def mainRyan(): # Set parameters for loading the data num_records = 'all' csvfile = "Data/BKGDAT_ZeroTOTALBKD.txt" # Set parameters for filtering the data market = AirportCodes.Frankfurt cabins=["Y"] # Get the data, filter it, and group it by flight print "Loading " + csvfile f = FeatureFilter(num_records, csvfile) print "Filtering" if market is None: orgs=[AirportCodes.Dubai, AirportCodes.London, AirportCodes.Bahrain, AirportCodes.Frankfurt, AirportCodes.Bangkok] dests=[AirportCodes.Dubai, AirportCodes.London, AirportCodes.Bahrain, AirportCodes.Frankfurt, AirportCodes.Bangkok] data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins) else: orgs=[AirportCodes.Dubai, market] dests=[AirportCodes.Dubai, market] data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins) print "Grouping by flight" unique_flights = f.getUniqueFlights(data) # Encode the flights print "Encoding flight data" start = -90 stop = 0 num_points = 31 interp_params = (start, stop, num_points) bin_size = 3 date_reduction = 0 cat_encoding = (bin_size, date_reduction) X, y, ids = encodeFlights(unique_flights, interp_params, cat_encoding) X_train, y_train, X_test, y_test, ids_train, ids_test = aggregateTrainTestSplit(X, y, ids, 0.75) return X_train, y_train, X_test, y_test, ids_train, ids_test, interp_params, cat_encoding
def mainKyle(): # Set parameters for loading the data num_records = 'all' csvfile = "Data/BKGDAT_ZeroTOTALBKD.txt" # Set parameters for filtering the data market = AirportCodes.London orgs=[AirportCodes.Dubai, market] dests=[AirportCodes.Dubai, market] cabins=["Y"] # Get the data, filter it, and group it by flight print "Loading " + csvfile f = FeatureFilter(num_records, csvfile) print "Filtering" data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins) print "Grouping by flight" unique_flights = f.getUniqueFlights(data) # Encode the flights print "Encoding flight data" start = -90 stop = 0 num_points = 31 interp_params = (start, stop, num_points) bin_size = 3 date_reduction = 0 cat_encoding = (bin_size, date_reduction) num_folds = 3 X, y, ids = encodeFlights(unique_flights, interp_params, cat_encoding) _, num_features = X[0].shape print 'Generating k-fold' kf = kFoldSplit(X, y, ids, num_folds) print 'Selecting features' model = KNeighborsRegressor print sequentialForwardFeatureSelection(model, kf, num_features)