def event_days_since_publication(self): if not self.event_dates or not self.pubdate: return {} resp = {} for source, date_list in self.event_dates.iteritems(): resp[source] = [days_between(event_date_string, self.pubdate.isoformat()) for event_date_string in date_list] return resp
def event_days_since_publication(self): if not self.event_dates or not self.pubdate: return {} resp = {} for source, date_list in self.event_dates.iteritems(): resp[source] = [ days_between(event_date_string, self.pubdate.isoformat()) for event_date_string in date_list ] return resp
def load_data_with_daysBeforeTakeoff_and_sameFlightNum(days, filePrefix="BCN_BUD", dataset="Specific"): """ Load data with same flight number and the same days before takeoff. i.e. same equivalence class But in out dataset, one route means one flight number. :param days: the days before takeoff :param filePrefix: choose which route :param dataset: dataset name('Specific' or 'General') :return: data with same flight number and the same days before takeoff """ datas = load_data_with_prefix_and_dataset(filePrefix, dataset) output = [data for data in datas if util.days_between(data["ObservedDate"], data["Date"]) == days] return output
def load_data_with_prefix_and_dataset(filePrefix="BCN_BUD", dataset="Specific"): """ load the data in the 'dataset' with 'filePrefix' :param filePrefix: choose which route :param dataset: dataset name('Specific' or 'General') :return: decoded data """ currentDir = os.path.dirname(os.path.realpath(__file__)) observeDatesDirs = os.listdir( currentDir + "/data/" + dataset) # path directory of each observed date in the dataset filePaths = [] # keep all the file paths start with "filePrefix" data_decoded = [] # keep all the schedules start with "filePrefix" for date in observeDatesDirs: currentPath = currentDir + "/data/" + dataset + "/" + date try: files = os.listdir( currentPath) # file names in currect date directory for file in files: try: if filePrefix in file: filePath = os.path.join(currentPath, file) filePaths.append(filePath) fp = open(filePath, 'r') datas_with_specific_date = json.load(fp) # add observed data for data in datas_with_specific_date: #"Date" is the departure date, "ObservedDate" is the observed date data["ObservedDate"] = date.replace("-", "") data["State"] = util.days_between( data["Date"], data["ObservedDate"]) - 1 data_decoded += datas_with_specific_date # do not use append function except: print "Not a json file" except: print "Not a directory, MAC OS contains .DS_Store file." # filter the null entries data_decoded = filter(is_not_nullprice, data_decoded) return data_decoded
def load_data_with_daysBeforeTakeoff_and_sameFlightNum(days, filePrefix="BCN_BUD", dataset="Specific"): """ Load data with same flight number and the same days before takeoff. i.e. same equivalence class But in out dataset, one route means one flight number. :param days: the days before takeoff :param filePrefix: choose which route :param dataset: dataset name('Specific' or 'General') :return: data with same flight number and the same days before takeoff """ datas = load_data_with_prefix_and_dataset(filePrefix, dataset) output = [ data for data in datas if util.days_between(data["ObservedDate"], data["Date"]) == days ] return output
def load_data_with_prefix_and_dataset(filePrefix="BCN_BUD", dataset="Specific"): """ load the data in the 'dataset' with 'filePrefix' :param filePrefix: choose which route :param dataset: dataset name('Specific' or 'General') :return: decoded data """ currentDir = os.path.dirname(os.path.realpath(__file__)) observeDatesDirs = os.listdir(currentDir + "/data/" + dataset) # path directory of each observed date in the dataset filePaths = [] # keep all the file paths start with "filePrefix" data_decoded = [] # keep all the schedules start with "filePrefix" for date in observeDatesDirs: currentPath = currentDir + "/data/" + dataset + "/" + date try: files = os.listdir(currentPath) # file names in currect date directory for file in files: try: if filePrefix in file: filePath = os.path.join(currentPath, file) filePaths.append(filePath) fp = open(filePath, 'r') datas_with_specific_date = json.load(fp) # add observed data for data in datas_with_specific_date: #"Date" is the departure date, "ObservedDate" is the observed date data["ObservedDate"] = date.replace("-", "") data["State"] = util.days_between(data["Date"], data["ObservedDate"]) - 1 data_decoded += datas_with_specific_date # do not use append function except: print "Not a json file" except: print "Not a directory, MAC OS contains .DS_Store file." # filter the null entries data_decoded = filter(is_not_nullprice, data_decoded) return data_decoded
def load_for_classification_for_General(dataset="General", routes=routes_general): """ Load the data for classification :param dataset: dataset name('Specific' or 'General') :return: X_train, y_train, X_test, y_test """ isOneOptimalState = False # Construct the input data dim = routes.__len__() + 4 X_train = np.empty(shape=(0, dim)) y_train = np.empty(shape=(0, 1)) y_train_price = np.empty(shape=(0, 1)) for filePrefix in routes: print filePrefix datas = load_data_with_prefix_and_dataset(filePrefix, dataset) for data in datas: print "Construct route {}, State {}, departureDate {}...".format( filePrefix, data["State"], data["Date"]) x_i = [] # feature 1: flight number -> dummy variables for i in range(len(routes)): """ !!!need to change! """ if i == routes.index(filePrefix): x_i.append(1) else: x_i.append(0) # feature 2: departure date interval from "20151109", because the first observed date is 20151109 departureDate = data["Date"] """ !!!maybe need to change the first observed date """ departureDateGap = util.days_between(departureDate, "20151109") x_i.append(departureDateGap) # feature 3: observed days before departure date state = data["State"] x_i.append(state) # feature 4: minimum price before the observed date minimumPreviousPrice = getMinimumPreviousPrice( data["Date"], state, datas) x_i.append(minimumPreviousPrice) # feature 5: maximum price before the observed date maximumPreviousPrice = getMaximumPreviousPrice( data["Date"], state, datas) x_i.append(maximumPreviousPrice) # output y_i = [0] specificDatas = [] specificDatas = [ data2 for data2 in datas if data2["Date"] == departureDate ] minPrice = getMinimumPrice(specificDatas) if util.getPrice(data["MinimumPrice"]) == minPrice: y_i = [1] # keep price info y_price = [util.getPrice(data["MinimumPrice"])] X_train = np.concatenate((X_train, [x_i]), axis=0) y_train = np.concatenate((y_train, [y_i]), axis=0) y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) # end of for datas # end of for routes """ remove duplicate rows """ tmp = np.concatenate((X_train, y_train, y_train_price), axis=1) new_array = [tuple(row) for row in tmp] tmp = np.unique(new_array) # # get the result # X_train = tmp[:, 0:16] # y_train = tmp[:, 16] # y_train_price = tmp[:, 17] # save the result np.save('inputGeneralRaw/X_train', X_train) np.save('inputGeneralRaw/y_train', y_train) np.save('inputGeneralRaw/y_train_price', y_train_price) np.save('inputGeneralRaw/tmp', tmp) return X_train, y_train, y_train_price
def load_for_classification_for_Specific(dataset="Specific", routes=routes_specific): """ Load the data for classification :param dataset: dataset name('Specific' or 'General') :return: X_train, y_train, X_test, y_test """ isOneOptimalState = False # Construct the input data dim = routes.__len__() + 4 X_train = np.empty(shape=(0, dim)) y_train = np.empty(shape=(0, 1)) y_train_price = np.empty(shape=(0, 1)) X_test = np.empty(shape=(0, dim)) y_test = np.empty(shape=(0, 1)) y_test_price = np.empty(shape=(0, 1)) for filePrefix in routes: datas = load_data_with_prefix_and_dataset(filePrefix, dataset) for data in datas: print "Construct route {}, State {}, departureDate {}...".format( filePrefix, data["State"], data["Date"]) x_i = [] # feature 1: flight number -> dummy variables for i in range(len(routes)): """ !!!need to change! """ if i == routes.index(filePrefix): x_i.append(1) else: x_i.append(0) # feature 2: departure date interval from "20151109", because the first observed date is 20151109 departureDate = data["Date"] """ !!!maybe need to change the first observed date """ departureDateGap = util.days_between(departureDate, "20151109") x_i.append(departureDateGap) # feature 3: observed days before departure date state = data["State"] x_i.append(state) # feature 4: minimum price before the observed date minimumPreviousPrice = getMinimumPreviousPrice( data["Date"], state, datas) x_i.append(minimumPreviousPrice) # feature 5: maximum price before the observed date maximumPreviousPrice = getMaximumPreviousPrice( data["Date"], state, datas) x_i.append(maximumPreviousPrice) # output y_i = [0] specificDatas = [] specificDatas = [ data2 for data2 in datas if data2["Date"] == departureDate ] # if isOneOptimalState: # # Method 1: only 1 entry is buy # optimalState = getOptimalState(specificDatas) # if data["State"] == optimalState: # y_i = [1] # else: # # Method 2: multiple entries can be buy # minPrice = getMinimumPrice(specificDatas) # if util.getPrice(data["MinimumPrice"]) == minPrice: # y_i = [1] #Method 2: multiple entries can be buy minPrice = getMinimumPrice(specificDatas) if util.getPrice(data["MinimumPrice"]) == minPrice: y_i = [1] # keep price info y_price = [util.getPrice(data["MinimumPrice"])] if int(departureDate) < 20160229 and int( departureDate ) >= 20151129: # choose date between "20151129-20160229(20160115)" as training data X_train = np.concatenate((X_train, [x_i]), axis=0) y_train = np.concatenate((y_train, [y_i]), axis=0) y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) elif int(departureDate) < 20160508 and int( departureDate ) >= 20160229: # choose date before "20160508(20160220)" as test data X_test = np.concatenate((X_test, [x_i]), axis=0) y_test = np.concatenate((y_test, [y_i]), axis=0) y_test_price = np.concatenate((y_test_price, [y_price]), axis=0) else: pass # X_train = np.concatenate((X_train, [x_i]), axis=0) # y_train = np.concatenate((y_train, [y_i]), axis=0) # y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) # end of for datas # end of for routes """ remove duplicate rows for train """ tmp_train = np.concatenate((X_train, y_train, y_train_price), axis=1) new_array = [tuple(row) for row in tmp_train] tmp_train = np.unique(new_array) # get the result X_train = tmp_train[:, 0:12] y_train = tmp_train[:, 12] y_train_price = tmp_train[:, 13] """ remove duplicate rows for test """ tmp_test = np.concatenate((X_test, y_test, y_test_price), axis=1) new_array = [tuple(row) for row in tmp_test] tmp_test = np.unique(new_array) # get the result X_test = tmp_test[:, 0:12] y_test = tmp_test[:, 12] y_test_price = tmp_test[:, 13] # save the result np.save('inputSpecificRaw/X_train', X_train) np.save('inputSpecificRaw/y_train', y_train) np.save('inputSpecificRaw/y_train_price', y_train_price) np.save('inputSpecificRaw/X_test', X_test) np.save('inputSpecificRaw/y_test', y_test) np.save('inputSpecificRaw/y_test_price', y_test_price) return X_train, y_train, X_test, y_test
def load_for_classification_for_General(dataset="General", routes=routes_general): """ Load the data for classification :param dataset: dataset name('Specific' or 'General') :return: X_train, y_train, X_test, y_test """ isOneOptimalState = False # Construct the input data dim = routes.__len__() + 4 X_train = np.empty(shape=(0, dim)) y_train = np.empty(shape=(0,1)) y_train_price = np.empty(shape=(0,1)) for filePrefix in routes: print filePrefix datas = load_data_with_prefix_and_dataset(filePrefix, dataset) for data in datas: print "Construct route {}, State {}, departureDate {}...".format(filePrefix, data["State"], data["Date"]) x_i = [] # feature 1: flight number -> dummy variables for i in range(len(routes)): """ !!!need to change! """ if i == routes.index(filePrefix): x_i.append(1) else: x_i.append(0) # feature 2: departure date interval from "20151109", because the first observed date is 20151109 departureDate = data["Date"] """ !!!maybe need to change the first observed date """ departureDateGap = util.days_between(departureDate, "20151109") x_i.append(departureDateGap) # feature 3: observed days before departure date state = data["State"] x_i.append(state) # feature 4: minimum price before the observed date minimumPreviousPrice = getMinimumPreviousPrice(data["Date"], state, datas) x_i.append(minimumPreviousPrice) # feature 5: maximum price before the observed date maximumPreviousPrice = getMaximumPreviousPrice(data["Date"], state, datas) x_i.append(maximumPreviousPrice) # output y_i = [0] specificDatas = [] specificDatas = [data2 for data2 in datas if data2["Date"]==departureDate] minPrice = getMinimumPrice(specificDatas) if util.getPrice(data["MinimumPrice"]) == minPrice: y_i = [1] # keep price info y_price = [util.getPrice(data["MinimumPrice"])] X_train = np.concatenate((X_train, [x_i]), axis=0) y_train = np.concatenate((y_train, [y_i]), axis=0) y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) # end of for datas # end of for routes """ remove duplicate rows """ tmp = np.concatenate((X_train, y_train, y_train_price), axis=1) new_array = [tuple(row) for row in tmp] tmp = np.unique(new_array) # # get the result # X_train = tmp[:, 0:16] # y_train = tmp[:, 16] # y_train_price = tmp[:, 17] # save the result np.save('inputGeneralRaw/X_train', X_train) np.save('inputGeneralRaw/y_train', y_train) np.save('inputGeneralRaw/y_train_price', y_train_price) np.save('inputGeneralRaw/tmp', tmp) return X_train, y_train, y_train_price
def load_for_classification_for_Specific(dataset="Specific", routes=routes_specific): """ Load the data for classification :param dataset: dataset name('Specific' or 'General') :return: X_train, y_train, X_test, y_test """ isOneOptimalState = False # Construct the input data dim = routes.__len__() + 4 X_train = np.empty(shape=(0, dim)) y_train = np.empty(shape=(0,1)) y_train_price = np.empty(shape=(0,1)) X_test = np.empty(shape=(0,dim)) y_test = np.empty(shape=(0,1)) y_test_price = np.empty(shape=(0,1)) for filePrefix in routes: datas = load_data_with_prefix_and_dataset(filePrefix, dataset) for data in datas: print "Construct route {}, State {}, departureDate {}...".format(filePrefix, data["State"], data["Date"]) x_i = [] # feature 1: flight number -> dummy variables for i in range(len(routes)): """ !!!need to change! """ if i == routes.index(filePrefix): x_i.append(1) else: x_i.append(0) # feature 2: departure date interval from "20151109", because the first observed date is 20151109 departureDate = data["Date"] """ !!!maybe need to change the first observed date """ departureDateGap = util.days_between(departureDate, "20151109") x_i.append(departureDateGap) # feature 3: observed days before departure date state = data["State"] x_i.append(state) # feature 4: minimum price before the observed date minimumPreviousPrice = getMinimumPreviousPrice(data["Date"], state, datas) x_i.append(minimumPreviousPrice) # feature 5: maximum price before the observed date maximumPreviousPrice = getMaximumPreviousPrice(data["Date"], state, datas) x_i.append(maximumPreviousPrice) # output y_i = [0] specificDatas = [] specificDatas = [data2 for data2 in datas if data2["Date"]==departureDate] # if isOneOptimalState: # # Method 1: only 1 entry is buy # optimalState = getOptimalState(specificDatas) # if data["State"] == optimalState: # y_i = [1] # else: # # Method 2: multiple entries can be buy # minPrice = getMinimumPrice(specificDatas) # if util.getPrice(data["MinimumPrice"]) == minPrice: # y_i = [1] #Method 2: multiple entries can be buy minPrice = getMinimumPrice(specificDatas) if util.getPrice(data["MinimumPrice"]) == minPrice: y_i = [1] # keep price info y_price = [util.getPrice(data["MinimumPrice"])] if int(departureDate) < 20160229 and int(departureDate) >= 20151129: # choose date between "20151129-20160229(20160115)" as training data X_train = np.concatenate((X_train, [x_i]), axis=0) y_train = np.concatenate((y_train, [y_i]), axis=0) y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) elif int(departureDate) < 20160508 and int(departureDate) >= 20160229: # choose date before "20160508(20160220)" as test data X_test = np.concatenate((X_test, [x_i]), axis=0) y_test = np.concatenate((y_test, [y_i]), axis=0) y_test_price = np.concatenate((y_test_price, [y_price]), axis=0) else: pass # X_train = np.concatenate((X_train, [x_i]), axis=0) # y_train = np.concatenate((y_train, [y_i]), axis=0) # y_train_price = np.concatenate((y_train_price, [y_price]), axis=0) # end of for datas # end of for routes """ remove duplicate rows for train """ tmp_train = np.concatenate((X_train, y_train, y_train_price), axis=1) new_array = [tuple(row) for row in tmp_train] tmp_train = np.unique(new_array) # get the result X_train = tmp_train[:, 0:12] y_train = tmp_train[:, 12] y_train_price = tmp_train[:, 13] """ remove duplicate rows for test """ tmp_test = np.concatenate((X_test, y_test, y_test_price), axis=1) new_array = [tuple(row) for row in tmp_test] tmp_test = np.unique(new_array) # get the result X_test = tmp_test[:, 0:12] y_test = tmp_test[:, 12] y_test_price = tmp_test[:, 13] # save the result np.save('inputSpecificRaw/X_train', X_train) np.save('inputSpecificRaw/y_train', y_train) np.save('inputSpecificRaw/y_train_price', y_train_price) np.save('inputSpecificRaw/X_test', X_test) np.save('inputSpecificRaw/y_test', y_test) np.save('inputSpecificRaw/y_test_price', y_test_price) return X_train, y_train, X_test, y_test