def getDrillDown(self, df=None, orgs=None, dests=None, flights=None, cabins=None, bcs=None, date_ranges=None): """ Filters a dataframe for rows that match the given features. For instance, orgs=['DXB', 'DMM'], bcs=['B'] will return the dataframe that contains rows with flights that departed from DXB or DMM AND that were made in the 'B' booking class args: df: a pd.DataFrame object that the user wants to filter orgs: list of strings describing origin airports dests: list of strings describing destination airports flights: list of ints describing flight numbers cabins: list of strings describing cabins ('Y' or 'J') bcs: list of strings describing booking classes data_ranges: list of tuples of strings describing desired dates returns: pd.DataFrame instance """ if not isinstance(df, pd.DataFrame): df = self.entities.copy() false_mask = pd.Series(False, list(df.index)) m = pd.Series(True, list(df.index)) m = m & self._mask(false_mask, orgs, df.ORG) m = m & self._mask(false_mask, dests, df.DES) m = m & self._mask(false_mask, flights, df.FLT) m = m & self._mask(false_mask, bcs, df.BC) m = m & self._mask(false_mask, date_ranges, df.DATE) if cabins is not None: cs = [] for cabin in cabins: classes = Utils.mapCabinToBookingClasses(cabin) cs.extend([bc for bc, rank in classes]) m = m & self._mask(false_mask, cs, df.BC) return df[m]
def bookingClassTicketFrequencies(f, data, cabin): print "Grouping into unique flight/booking class combinations" flight_data = f.getUniqueFlightsAndBookings(data) bcs = Utils.mapCabinToBookingClasses(cabin) bcs = {bc: 0 for (bc, r) in bcs} print "Iterating through all booking classes" for flight, flight_df in flight_data: bc = flight[-1] keyday = -1 * flight_df["KEYDAY"] bkd = flight_df["BKD"] keyday, bkd = Utils.sortByIndex(keyday, bkd) bcs[bc] += bkd[-1] total_bkd = 0.0 for bc, num_bkd in bcs.items(): total_bkd += num_bkd for bc in bcs: bcs[bc] /= total_bkd ks, vs = zip(*bcs.items()) ks, vs = zip(*sorted(zip(ks, vs), key=lambda tup: Utils.compareBCs(tup[0]))) indices = np.arange(len(ks)) width = 0.75 fig, ax = plt.subplots() rects = ax.bar(indices, vs, width) ax.set_ylabel("Percent of Total Booked") ax.set_title("Booking Class Ticketing Distribution - Economy Cabin") ax.set_xticks(indices + width / 2.0) ax.set_xticklabels(ks) plt.grid() plt.show()
def bc_bars_base(y_cumsum): """ args: y_cumsum: a sumcum deltabkd vector (either predict or actual) """ totalbkd_vector = y_cumsum[:, -1] assert len(ids_test) == len(totalbkd_vector) bcs = {bc: 0 for (bc, r) in Utils.mapCabinToBookingClasses("Y")} for totalbkd, ids in zip(totalbkd_vector, ids_test): bc = ids[-1] bcs[bc] += totalbkd denom = sum(totalbkd_vector) for bc in bcs: bcs[bc] /= denom ks, vs = zip(*bcs.items()) ks, vs = zip(*sorted(zip(ks, vs), key=lambda tup: Utils.compareBCs(tup[0]))) indices = np.arange(len(ks)) return ks, vs, indices