Esempio n. 1
0
 def event_days_since_publication(self):
     if not self.event_dates or not self.pubdate:
         return {}
     resp = {}
     for source, date_list in self.event_dates.iteritems():
         resp[source] = [days_between(event_date_string, self.pubdate.isoformat()) for event_date_string in date_list]
     return resp
Esempio n. 2
0
 def event_days_since_publication(self):
     if not self.event_dates or not self.pubdate:
         return {}
     resp = {}
     for source, date_list in self.event_dates.iteritems():
         resp[source] = [
             days_between(event_date_string, self.pubdate.isoformat())
             for event_date_string in date_list
         ]
     return resp
Esempio n. 3
0
def load_data_with_daysBeforeTakeoff_and_sameFlightNum(days, filePrefix="BCN_BUD", dataset="Specific"):
    """
    Load data with same flight number and the same days before takeoff.
    i.e. same equivalence class
    But in out dataset, one route means one flight number.
    :param days: the days before takeoff
    :param filePrefix: choose which route
    :param dataset: dataset name('Specific' or 'General')
    :return: data with same flight number and the same days before takeoff
    """
    datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
    output = [data for data in datas if util.days_between(data["ObservedDate"], data["Date"]) == days]

    return output
Esempio n. 4
0
def load_data_with_prefix_and_dataset(filePrefix="BCN_BUD",
                                      dataset="Specific"):
    """
    load the data in the 'dataset' with 'filePrefix'
    :param filePrefix: choose which route
    :param dataset: dataset name('Specific' or 'General')
    :return: decoded data
    """
    currentDir = os.path.dirname(os.path.realpath(__file__))
    observeDatesDirs = os.listdir(
        currentDir + "/data/" +
        dataset)  # path directory of each observed date in the dataset

    filePaths = []  # keep all the file paths start with "filePrefix"
    data_decoded = []  # keep all the schedules start with "filePrefix"

    for date in observeDatesDirs:
        currentPath = currentDir + "/data/" + dataset + "/" + date

        try:
            files = os.listdir(
                currentPath)  # file names in currect date directory
            for file in files:
                try:
                    if filePrefix in file:
                        filePath = os.path.join(currentPath, file)
                        filePaths.append(filePath)

                        fp = open(filePath, 'r')
                        datas_with_specific_date = json.load(fp)
                        # add observed data
                        for data in datas_with_specific_date:
                            #"Date" is the departure date, "ObservedDate" is the observed date
                            data["ObservedDate"] = date.replace("-", "")
                            data["State"] = util.days_between(
                                data["Date"], data["ObservedDate"]) - 1
                        data_decoded += datas_with_specific_date  # do not use append function

                except:
                    print "Not a json file"
        except:
            print "Not a directory, MAC OS contains .DS_Store file."

    # filter the null entries
    data_decoded = filter(is_not_nullprice, data_decoded)

    return data_decoded
Esempio n. 5
0
def load_data_with_daysBeforeTakeoff_and_sameFlightNum(days,
                                                       filePrefix="BCN_BUD",
                                                       dataset="Specific"):
    """
    Load data with same flight number and the same days before takeoff.
    i.e. same equivalence class
    But in out dataset, one route means one flight number.
    :param days: the days before takeoff
    :param filePrefix: choose which route
    :param dataset: dataset name('Specific' or 'General')
    :return: data with same flight number and the same days before takeoff
    """
    datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
    output = [
        data for data in datas
        if util.days_between(data["ObservedDate"], data["Date"]) == days
    ]

    return output
Esempio n. 6
0
def load_data_with_prefix_and_dataset(filePrefix="BCN_BUD", dataset="Specific"):
    """
    load the data in the 'dataset' with 'filePrefix'
    :param filePrefix: choose which route
    :param dataset: dataset name('Specific' or 'General')
    :return: decoded data
    """
    currentDir = os.path.dirname(os.path.realpath(__file__))
    observeDatesDirs = os.listdir(currentDir + "/data/" + dataset) # path directory of each observed date in the dataset

    filePaths = [] # keep all the file paths start with "filePrefix"
    data_decoded = [] # keep all the schedules start with "filePrefix"

    for date in observeDatesDirs:
        currentPath = currentDir + "/data/" + dataset + "/" + date

        try:
            files = os.listdir(currentPath) # file names in currect date directory
            for file in files:
                try:
                    if filePrefix in file:
                        filePath = os.path.join(currentPath, file)
                        filePaths.append(filePath)

                        fp = open(filePath, 'r')
                        datas_with_specific_date = json.load(fp)
                        # add observed data
                        for data in datas_with_specific_date:
                            #"Date" is the departure date, "ObservedDate" is the observed date
                            data["ObservedDate"] = date.replace("-", "")
                            data["State"] = util.days_between(data["Date"], data["ObservedDate"]) - 1
                        data_decoded += datas_with_specific_date # do not use append function

                except:
                    print "Not a json file"
        except:
            print "Not a directory, MAC OS contains .DS_Store file."

    # filter the null entries
    data_decoded = filter(is_not_nullprice, data_decoded)

    return data_decoded
Esempio n. 7
0
def load_for_classification_for_General(dataset="General",
                                        routes=routes_general):
    """
    Load the data for classification
    :param dataset: dataset name('Specific' or 'General')
    :return: X_train, y_train, X_test, y_test
    """
    isOneOptimalState = False
    # Construct the input data
    dim = routes.__len__() + 4
    X_train = np.empty(shape=(0, dim))
    y_train = np.empty(shape=(0, 1))
    y_train_price = np.empty(shape=(0, 1))

    for filePrefix in routes:
        print filePrefix
        datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
        for data in datas:
            print "Construct route {}, State {}, departureDate {}...".format(
                filePrefix, data["State"], data["Date"])
            x_i = []
            # feature 1: flight number -> dummy variables
            for i in range(len(routes)):
                """
                !!!need to change!
                """
                if i == routes.index(filePrefix):
                    x_i.append(1)
                else:
                    x_i.append(0)

            # feature 2: departure date interval from "20151109", because the first observed date is 20151109
            departureDate = data["Date"]
            """
            !!!maybe need to change the first observed date
            """
            departureDateGap = util.days_between(departureDate, "20151109")
            x_i.append(departureDateGap)

            # feature 3: observed days before departure date
            state = data["State"]
            x_i.append(state)

            # feature 4: minimum price before the observed date
            minimumPreviousPrice = getMinimumPreviousPrice(
                data["Date"], state, datas)
            x_i.append(minimumPreviousPrice)

            # feature 5: maximum price before the observed date
            maximumPreviousPrice = getMaximumPreviousPrice(
                data["Date"], state, datas)
            x_i.append(maximumPreviousPrice)

            # output
            y_i = [0]
            specificDatas = []
            specificDatas = [
                data2 for data2 in datas if data2["Date"] == departureDate
            ]

            minPrice = getMinimumPrice(specificDatas)
            if util.getPrice(data["MinimumPrice"]) == minPrice:
                y_i = [1]

            # keep price info
            y_price = [util.getPrice(data["MinimumPrice"])]

            X_train = np.concatenate((X_train, [x_i]), axis=0)
            y_train = np.concatenate((y_train, [y_i]), axis=0)
            y_train_price = np.concatenate((y_train_price, [y_price]), axis=0)

        # end of for datas
    # end of for routes
    """
    remove duplicate rows
    """
    tmp = np.concatenate((X_train, y_train, y_train_price), axis=1)
    new_array = [tuple(row) for row in tmp]
    tmp = np.unique(new_array)

    # # get the result
    # X_train = tmp[:, 0:16]
    # y_train = tmp[:, 16]
    # y_train_price = tmp[:, 17]

    # save the result
    np.save('inputGeneralRaw/X_train', X_train)
    np.save('inputGeneralRaw/y_train', y_train)
    np.save('inputGeneralRaw/y_train_price', y_train_price)
    np.save('inputGeneralRaw/tmp', tmp)

    return X_train, y_train, y_train_price
Esempio n. 8
0
def load_for_classification_for_Specific(dataset="Specific",
                                         routes=routes_specific):
    """
    Load the data for classification
    :param dataset: dataset name('Specific' or 'General')
    :return: X_train, y_train, X_test, y_test
    """
    isOneOptimalState = False
    # Construct the input data
    dim = routes.__len__() + 4
    X_train = np.empty(shape=(0, dim))
    y_train = np.empty(shape=(0, 1))
    y_train_price = np.empty(shape=(0, 1))
    X_test = np.empty(shape=(0, dim))
    y_test = np.empty(shape=(0, 1))
    y_test_price = np.empty(shape=(0, 1))

    for filePrefix in routes:
        datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
        for data in datas:
            print "Construct route {}, State {}, departureDate {}...".format(
                filePrefix, data["State"], data["Date"])
            x_i = []
            # feature 1: flight number -> dummy variables
            for i in range(len(routes)):
                """
                !!!need to change!
                """
                if i == routes.index(filePrefix):
                    x_i.append(1)
                else:
                    x_i.append(0)

            # feature 2: departure date interval from "20151109", because the first observed date is 20151109
            departureDate = data["Date"]
            """
            !!!maybe need to change the first observed date
            """
            departureDateGap = util.days_between(departureDate, "20151109")
            x_i.append(departureDateGap)

            # feature 3: observed days before departure date
            state = data["State"]
            x_i.append(state)

            # feature 4: minimum price before the observed date
            minimumPreviousPrice = getMinimumPreviousPrice(
                data["Date"], state, datas)
            x_i.append(minimumPreviousPrice)

            # feature 5: maximum price before the observed date
            maximumPreviousPrice = getMaximumPreviousPrice(
                data["Date"], state, datas)
            x_i.append(maximumPreviousPrice)

            # output
            y_i = [0]
            specificDatas = []
            specificDatas = [
                data2 for data2 in datas if data2["Date"] == departureDate
            ]

            # if isOneOptimalState:
            #     # Method 1: only 1 entry is buy
            #     optimalState = getOptimalState(specificDatas)
            #     if data["State"] == optimalState:
            #        y_i = [1]
            # else:
            #     # Method 2: multiple entries can be buy
            #     minPrice = getMinimumPrice(specificDatas)
            #     if util.getPrice(data["MinimumPrice"]) == minPrice:
            #         y_i = [1]

            #Method 2: multiple entries can be buy
            minPrice = getMinimumPrice(specificDatas)
            if util.getPrice(data["MinimumPrice"]) == minPrice:
                y_i = [1]

            # keep price info
            y_price = [util.getPrice(data["MinimumPrice"])]

            if int(departureDate) < 20160229 and int(
                    departureDate
            ) >= 20151129:  # choose date between "20151129-20160229(20160115)" as training data
                X_train = np.concatenate((X_train, [x_i]), axis=0)
                y_train = np.concatenate((y_train, [y_i]), axis=0)
                y_train_price = np.concatenate((y_train_price, [y_price]),
                                               axis=0)
            elif int(departureDate) < 20160508 and int(
                    departureDate
            ) >= 20160229:  # choose date before "20160508(20160220)" as test data
                X_test = np.concatenate((X_test, [x_i]), axis=0)
                y_test = np.concatenate((y_test, [y_i]), axis=0)
                y_test_price = np.concatenate((y_test_price, [y_price]),
                                              axis=0)
            else:
                pass

            # X_train = np.concatenate((X_train, [x_i]), axis=0)
            # y_train = np.concatenate((y_train, [y_i]), axis=0)
            # y_train_price = np.concatenate((y_train_price, [y_price]), axis=0)

        # end of for datas
    # end of for routes
    """
    remove duplicate rows for train
    """
    tmp_train = np.concatenate((X_train, y_train, y_train_price), axis=1)
    new_array = [tuple(row) for row in tmp_train]
    tmp_train = np.unique(new_array)

    # get the result
    X_train = tmp_train[:, 0:12]
    y_train = tmp_train[:, 12]
    y_train_price = tmp_train[:, 13]
    """
    remove duplicate rows for test
    """
    tmp_test = np.concatenate((X_test, y_test, y_test_price), axis=1)
    new_array = [tuple(row) for row in tmp_test]
    tmp_test = np.unique(new_array)

    # get the result
    X_test = tmp_test[:, 0:12]
    y_test = tmp_test[:, 12]
    y_test_price = tmp_test[:, 13]

    # save the result
    np.save('inputSpecificRaw/X_train', X_train)
    np.save('inputSpecificRaw/y_train', y_train)
    np.save('inputSpecificRaw/y_train_price', y_train_price)
    np.save('inputSpecificRaw/X_test', X_test)
    np.save('inputSpecificRaw/y_test', y_test)
    np.save('inputSpecificRaw/y_test_price', y_test_price)

    return X_train, y_train, X_test, y_test
Esempio n. 9
0
def load_for_classification_for_General(dataset="General", routes=routes_general):
    """
    Load the data for classification
    :param dataset: dataset name('Specific' or 'General')
    :return: X_train, y_train, X_test, y_test
    """
    isOneOptimalState = False
    # Construct the input data
    dim = routes.__len__() + 4
    X_train = np.empty(shape=(0, dim))
    y_train = np.empty(shape=(0,1))
    y_train_price = np.empty(shape=(0,1))

    for filePrefix in routes:
        print filePrefix
        datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
        for data in datas:
            print "Construct route {}, State {}, departureDate {}...".format(filePrefix, data["State"], data["Date"])
            x_i = []
            # feature 1: flight number -> dummy variables
            for i in range(len(routes)):
                """
                !!!need to change!
                """
                if i == routes.index(filePrefix):
                    x_i.append(1)
                else:
                    x_i.append(0)

            # feature 2: departure date interval from "20151109", because the first observed date is 20151109
            departureDate = data["Date"]
            """
            !!!maybe need to change the first observed date
            """
            departureDateGap = util.days_between(departureDate, "20151109")
            x_i.append(departureDateGap)

            # feature 3: observed days before departure date
            state = data["State"]
            x_i.append(state)

            # feature 4: minimum price before the observed date
            minimumPreviousPrice = getMinimumPreviousPrice(data["Date"], state, datas)
            x_i.append(minimumPreviousPrice)

            # feature 5: maximum price before the observed date
            maximumPreviousPrice = getMaximumPreviousPrice(data["Date"], state, datas)
            x_i.append(maximumPreviousPrice)

            # output
            y_i = [0]
            specificDatas = []
            specificDatas = [data2 for data2 in datas if data2["Date"]==departureDate]

            minPrice = getMinimumPrice(specificDatas)
            if util.getPrice(data["MinimumPrice"]) == minPrice:
                y_i = [1]


            # keep price info
            y_price = [util.getPrice(data["MinimumPrice"])]

            X_train = np.concatenate((X_train, [x_i]), axis=0)
            y_train = np.concatenate((y_train, [y_i]), axis=0)
            y_train_price = np.concatenate((y_train_price, [y_price]), axis=0)

        # end of for datas
    # end of for routes


    """
    remove duplicate rows
    """
    tmp = np.concatenate((X_train, y_train, y_train_price), axis=1)
    new_array = [tuple(row) for row in tmp]
    tmp = np.unique(new_array)

    # # get the result
    # X_train = tmp[:, 0:16]
    # y_train = tmp[:, 16]
    # y_train_price = tmp[:, 17]

    # save the result
    np.save('inputGeneralRaw/X_train', X_train)
    np.save('inputGeneralRaw/y_train', y_train)
    np.save('inputGeneralRaw/y_train_price', y_train_price)
    np.save('inputGeneralRaw/tmp', tmp)

    return X_train, y_train, y_train_price
Esempio n. 10
0
def load_for_classification_for_Specific(dataset="Specific", routes=routes_specific):
    """
    Load the data for classification
    :param dataset: dataset name('Specific' or 'General')
    :return: X_train, y_train, X_test, y_test
    """
    isOneOptimalState = False
    # Construct the input data
    dim = routes.__len__() + 4
    X_train = np.empty(shape=(0, dim))
    y_train = np.empty(shape=(0,1))
    y_train_price = np.empty(shape=(0,1))
    X_test = np.empty(shape=(0,dim))
    y_test = np.empty(shape=(0,1))
    y_test_price = np.empty(shape=(0,1))

    for filePrefix in routes:
        datas = load_data_with_prefix_and_dataset(filePrefix, dataset)
        for data in datas:
            print "Construct route {}, State {}, departureDate {}...".format(filePrefix, data["State"], data["Date"])
            x_i = []
            # feature 1: flight number -> dummy variables
            for i in range(len(routes)):
                """
                !!!need to change!
                """
                if i == routes.index(filePrefix):
                    x_i.append(1)
                else:
                    x_i.append(0)

            # feature 2: departure date interval from "20151109", because the first observed date is 20151109
            departureDate = data["Date"]
            """
            !!!maybe need to change the first observed date
            """
            departureDateGap = util.days_between(departureDate, "20151109")
            x_i.append(departureDateGap)

            # feature 3: observed days before departure date
            state = data["State"]
            x_i.append(state)

            # feature 4: minimum price before the observed date
            minimumPreviousPrice = getMinimumPreviousPrice(data["Date"], state, datas)
            x_i.append(minimumPreviousPrice)

            # feature 5: maximum price before the observed date
            maximumPreviousPrice = getMaximumPreviousPrice(data["Date"], state, datas)
            x_i.append(maximumPreviousPrice)

            # output
            y_i = [0]
            specificDatas = []
            specificDatas = [data2 for data2 in datas if data2["Date"]==departureDate]

            # if isOneOptimalState:
            #     # Method 1: only 1 entry is buy
            #     optimalState = getOptimalState(specificDatas)
            #     if data["State"] == optimalState:
            #        y_i = [1]
            # else:
            #     # Method 2: multiple entries can be buy
            #     minPrice = getMinimumPrice(specificDatas)
            #     if util.getPrice(data["MinimumPrice"]) == minPrice:
            #         y_i = [1]

            #Method 2: multiple entries can be buy
            minPrice = getMinimumPrice(specificDatas)
            if util.getPrice(data["MinimumPrice"]) == minPrice:
                y_i = [1]


            # keep price info
            y_price = [util.getPrice(data["MinimumPrice"])]

            if int(departureDate) < 20160229 and int(departureDate) >= 20151129: # choose date between "20151129-20160229(20160115)" as training data
                X_train = np.concatenate((X_train, [x_i]), axis=0)
                y_train = np.concatenate((y_train, [y_i]), axis=0)
                y_train_price = np.concatenate((y_train_price, [y_price]), axis=0)
            elif int(departureDate) < 20160508 and int(departureDate) >= 20160229: # choose date before "20160508(20160220)" as test data
                X_test = np.concatenate((X_test, [x_i]), axis=0)
                y_test = np.concatenate((y_test, [y_i]), axis=0)
                y_test_price = np.concatenate((y_test_price, [y_price]), axis=0)
            else:
                pass

            # X_train = np.concatenate((X_train, [x_i]), axis=0)
            # y_train = np.concatenate((y_train, [y_i]), axis=0)
            # y_train_price = np.concatenate((y_train_price, [y_price]), axis=0)

        # end of for datas
    # end of for routes


    """
    remove duplicate rows for train
    """
    tmp_train = np.concatenate((X_train, y_train, y_train_price), axis=1)
    new_array = [tuple(row) for row in tmp_train]
    tmp_train = np.unique(new_array)

    # get the result
    X_train = tmp_train[:, 0:12]
    y_train = tmp_train[:, 12]
    y_train_price = tmp_train[:, 13]

    """
    remove duplicate rows for test
    """
    tmp_test = np.concatenate((X_test, y_test, y_test_price), axis=1)
    new_array = [tuple(row) for row in tmp_test]
    tmp_test = np.unique(new_array)

    # get the result
    X_test = tmp_test[:, 0:12]
    y_test = tmp_test[:, 12]
    y_test_price = tmp_test[:, 13]

    # save the result
    np.save('inputSpecificRaw/X_train', X_train)
    np.save('inputSpecificRaw/y_train', y_train)
    np.save('inputSpecificRaw/y_train_price', y_train_price)
    np.save('inputSpecificRaw/X_test', X_test)
    np.save('inputSpecificRaw/y_test', y_test)
    np.save('inputSpecificRaw/y_test_price', y_test_price)

    return X_train, y_train, X_test, y_test