コード例 #1
0
    def getDrillDown(self, df=None, orgs=None, dests=None, flights=None,
                     cabins=None, bcs=None, date_ranges=None):
        """
        Filters a dataframe for rows that match the given features.
        For instance, orgs=['DXB', 'DMM'], bcs=['B'] will return the dataframe
        that contains rows with flights that departed from DXB or DMM AND that
        were made in the 'B' booking class

        args:
            df: a pd.DataFrame object that the user wants to filter
            orgs: list of strings describing origin airports
            dests: list of strings describing destination airports
            flights: list of ints describing flight numbers
            cabins: list of strings describing cabins ('Y' or 'J')
            bcs: list of strings describing booking classes
            data_ranges: list of tuples of strings describing desired dates

        returns:
            pd.DataFrame instance
        """

        if not isinstance(df, pd.DataFrame):
            df = self.entities.copy()

        false_mask = pd.Series(False, list(df.index))
        m = pd.Series(True, list(df.index))

        m = m & self._mask(false_mask, orgs, df.ORG)
        m = m & self._mask(false_mask, dests, df.DES)
        m = m & self._mask(false_mask, flights, df.FLT)
        m = m & self._mask(false_mask, bcs, df.BC)
        m = m & self._mask(false_mask, date_ranges, df.DATE)
        if cabins is not None:
            cs = []
            for cabin in cabins:
                classes = Utils.mapCabinToBookingClasses(cabin)
                cs.extend([bc for bc, rank in classes])
            m = m & self._mask(false_mask, cs, df.BC)

        return df[m]
コード例 #2
0
def bookingClassTicketFrequencies(f, data, cabin):
    print "Grouping into unique flight/booking class combinations"
    flight_data = f.getUniqueFlightsAndBookings(data)

    bcs = Utils.mapCabinToBookingClasses(cabin)
    bcs = {bc: 0 for (bc, r) in bcs}

    print "Iterating through all booking classes"
    for flight, flight_df in flight_data:
        bc = flight[-1]
        keyday = -1 * flight_df["KEYDAY"]
        bkd = flight_df["BKD"]

        keyday, bkd = Utils.sortByIndex(keyday, bkd)

        bcs[bc] += bkd[-1]

    total_bkd = 0.0
    for bc, num_bkd in bcs.items():
        total_bkd += num_bkd

    for bc in bcs:
        bcs[bc] /= total_bkd

    ks, vs = zip(*bcs.items())
    ks, vs = zip(*sorted(zip(ks, vs), key=lambda tup: Utils.compareBCs(tup[0])))
    indices = np.arange(len(ks))
    width = 0.75

    fig, ax = plt.subplots()
    rects = ax.bar(indices, vs, width)
    ax.set_ylabel("Percent of Total Booked")
    ax.set_title("Booking Class Ticketing Distribution - Economy Cabin")
    ax.set_xticks(indices + width / 2.0)
    ax.set_xticklabels(ks)

    plt.grid()
    plt.show()
コード例 #3
0
def bc_bars_base(y_cumsum):
    """
    args:
        y_cumsum: a sumcum deltabkd vector (either predict or actual)
    """
    totalbkd_vector = y_cumsum[:, -1]
    assert len(ids_test) == len(totalbkd_vector)

    bcs = {bc: 0 for (bc, r) in Utils.mapCabinToBookingClasses("Y")}

    for totalbkd, ids in zip(totalbkd_vector, ids_test):
        bc = ids[-1]
        bcs[bc] += totalbkd

    denom = sum(totalbkd_vector)

    for bc in bcs:
        bcs[bc] /= denom

    ks, vs = zip(*bcs.items())
    ks, vs = zip(*sorted(zip(ks, vs), key=lambda tup: Utils.compareBCs(tup[0])))
    indices = np.arange(len(ks))

    return ks, vs, indices