def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 1) #self.data_size = 24 self.data_size = 7 self.num_queries = 2000 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=True) #self.predictor2 = RNNModel () self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = simple_model((None, self.num_queries + 1), 2)
def __init__ (self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 1) self.data_size = 7 self.num_queries = 200 #with open ('timeseriesOnlineRetailCleaned2.csv', 'r') as f: with open ('../data/randomizedOnlineRetail.csv', 'r' ) as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [int(row[i]) for i in range (self.num_queries)] self.data.append (integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, svd=True, regularization=True) #self.predictor2 = GRUModel () self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.baseline3 = AverageModel(threshold=0.75, regularization=True)
def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 7 self.num_queries = 200 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 #print (self.data) #self.predictor1 = QLearn(threshold=0.5, regularization=True) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() saved_models = [( 10, "LSTM", "sgd", 0.001, 2, 32, "../results/10Qs/LSTM/2L/32HS/sgd-0.001lr/20180731-134128-683987/900epochs.h5" )] model_num = 0 self.num_queries = saved_models[model_num][0] # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) self.predictor3 = RNNModel(saved_models[model_num][0], rnn_type=saved_models[model_num][1], optimizer_type=saved_models[model_num][2], learning_rate=saved_models[model_num][3], layers=saved_models[model_num][4], hidden_size=saved_models[model_num][5]) self.predictor3.load_model(saved_models[model_num][6])
def __init__ (self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 7 self.num_queries = 200 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open (DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [int(row[i]) for i in range (self.num_queries)] self.data.append (integer_data) self.baseline2 = EarliestModel()
def __init__(self): self.data = [] self.training = (0, 0.5) self.validation = (0.5, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 96 self.num_queries = 10 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 self.ds = Electricity() data = self.ds.get_data() transpose = np.transpose(data) transpose = transpose[:self.num_queries] data = np.transpose(transpose) self.training_data = data[int(self.training[0] * len(data)):int(self.training[1] * len(data))] self.validation_data = data[int(self.validation[0] * len(data)):int(self.validation[1] * len(data))] self.testing_data = data[int(self.training[0] * len(data)):int(self.training[1] * len(data))] #for i in range (int(len (transpose) / self.num_queries)): #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=False) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = RNNModel(self.num_queries, rnn_type="LSTM", optimizer_type="adam", learning_rate=0.001, layers=2, hidden_size=64, recurrent_dropout=0.2)
def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 7 self.num_queries = 10 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=True) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = RNNModel(self.num_queries, rnn_type="LSTM", optimizer_type="sgd", learning_rate=0.001, layers=1, hidden_size=128, recurrent_dropout=0.2)
class QOnlineRetail: def __init__ (self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 1) self.data_size = 7 self.num_queries = 200 #with open ('timeseriesOnlineRetailCleaned2.csv', 'r') as f: with open ('../data/randomizedOnlineRetail.csv', 'r' ) as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [int(row[i]) for i in range (self.num_queries)] self.data.append (integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, svd=True, regularization=True) #self.predictor2 = GRUModel () self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.baseline3 = AverageModel(threshold=0.75, regularization=True) def clean_data (self): data_used = [] new_data = [] new_header = [] uncleaned_training_data = self.data[int (360*self.training[0]):int(360*self.training[1])] #print (len (uncleaned_training_data)) sums_of_columns = [ sum(x) for x in zip(*uncleaned_training_data) ] #print (sums_of_columns) mean = sum (sums_of_columns) / len (sums_of_columns) #print ("Mean = " + str(mean)) variance = 0 for i in range (0, len (sums_of_columns)): variance += (sums_of_columns[i]-mean) ** 2 variance = variance / len (sums_of_columns) #print ("Variance = " + str(variance)) std_dev = variance ** 0.5 #print ("Standard Deviation = " + str(std_dev)) mean_of_nonzero_columns = 0 num_of_nonzero_columns = 0 for i in range (0, len (sums_of_columns)): if sums_of_columns[i] > 0: mean_of_nonzero_columns += sums_of_columns[i] num_of_nonzero_columns += 1 mean_of_nonzero_columns = mean_of_nonzero_columns / num_of_nonzero_columns #print ("Mean of non-zero columns = " + str(mean_of_nonzero_columns)) variance_of_nonzero_columns = 0 for i in range (0, len (sums_of_columns)): if sums_of_columns[i] > 0: variance_of_nonzero_columns += (sums_of_columns[i]-mean_of_nonzero_columns) ** 2 variance_of_nonzero_columns = variance_of_nonzero_columns / num_of_nonzero_columns #print ("Variance of non-zero columns = " + str(variance_of_nonzero_columns)) std_dev_of_nonzero_columns = variance_of_nonzero_columns ** 0.5 #print ("Standard Deviation of non-zero columns = " + str(std_dev_of_nonzero_columns)) # Remove data one standard deviation below the non-zero mean lower_cutoff = mean_of_nonzero_columns - std_dev_of_nonzero_columns +3 print ("Lower cutoff = " + str(lower_cutoff)) for i in range (0, len(sums_of_columns)): if (sums_of_columns[i] > lower_cutoff): data_used.append (i) new_header.append (self.products[i]) for row in range (0, len (self.data)): new_data.append ([]) for col in range (0, len (self.data[row])): if (col in data_used): new_data[row].append (self.data[row][col]) #self.data = new_data #print (new_data) print ("Writing into CSV") with open ('timeseriesOnlineRetailCleaned.csv', "w") as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow (new_header) writer.writerows (new_data) def validate_predictor (self): input = [] output = [] week_data = [] for days in range (int (360*self.validation[0]), int (360*self.validation[0])+self.data_size): today = self.data[days] week_data = week_data+ today for days in range (int (360*self.validation[0])+self.data_size, int(360*self.validation[1])): today = self.data[days] input.append (week_data) output.append (today) week_data = week_data+ today for i in range (len (self.data[0])): week_data.pop (0) print () print ("Linear Algebra Model") #self.predictor1.try_ktruncations (input,output) self.predictor1.test_model (input, output, verbose=False) #self.predictor1.print_concepts() print () print ("Previous Day Naive Model") self.baseline1.test_model (input, output, verbose=False) print () print ("Earliest Day Naive Model") self.baseline2.test_model (input, output, verbose=False) print () print ("Average of Past Days Model") self.baseline3.test_model (input, output, verbose=False) """ print () print ("RNN Model") self.predictor2.test_model_keras (input, output) self.predictor2.test_model(input, output, verbose=False) """ def train_data (self): input = [] output = [] week_data = [] for days in range (int (360*self.training[0]), int (360*self.training[0])+self.data_size): today = self.data[days] week_data = week_data+ today for days in range (int (360*self.training[0])+self.data_size, int(360*self.training[1])): today = self.data[days] input.append (week_data) output.append (today) week_data = week_data+ today for i in range (len (self.data[0])): week_data.pop (0) self.predictor1.set_training_data (input, output) print ("") print ("Window size " + str (self.data_size) + " days") print ("") print ("Training Model...") self.predictor1.train() self.baseline1.train(input, output) self.baseline2.train (input, output) self.baseline3.train (input, output) #self.predictor2.train (input, output) print ("... Done Training") def print_concepts (self): self.predictor1.print_concepts()
class QOnlineRetail: def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 7 self.num_queries = 10 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=True) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = RNNModel(self.num_queries, rnn_type="LSTM", optimizer_type="sgd", learning_rate=0.001, layers=1, hidden_size=128, recurrent_dropout=0.2) #self.baseline3 = AverageModel(threshold=0.75, regularization=True) def validate_predictor(self): input = [] output = [] week_data = [] for days in range(int(360 * self.validation[0]), int(360 * self.validation[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.validation[0]) + self.data_size, int(360 * self.validation[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) print() print("Linear Algebra Model") #self.predictor1.try_ktruncations (input,output) self.predictor1.test_model(input, output, verbose=False) #self.predictor1.print_concepts() print() print("Previous Day Naive Model") self.baseline1.test_model(input, output, verbose=False) print() print("Earliest Day Naive Model") self.baseline2.test_model(input, output, verbose=False) #print () #print ("Average of Past Days Model") #self.baseline3.test_model (input, output, verbose=False) print() print("RNN Model") self.predictor3.test_model(self.xval, self.yval) #self.predictor2.test_model_keras (input, output) #self.predictor2.test_model(input, output, verbose=False) def train_data(self): input = [] output = [] week_data = [] for days in range(int(360 * self.training[0]), int(360 * self.training[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.training[0]) + self.data_size, int(360 * self.training[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) self.predictor1.set_training_data(input, output) self.x_data = self.data[:-1] self.y_data = self.data[1:] train_length = int(self.training[1] * len(self.y_data)) - int( self.training[0] * len(self.y_data)) x_train = np.zeros( (1, train_length, self.num_queries )) # 1 example, number of time steps, number of queries for i in range(int(self.training[0] * len(self.x_data)), int(self.training[0] * len(self.x_data)) + train_length): for q in range(self.num_queries): x_train[0][i][q] = self.x_data[i][ q] # sets the query at the time step to either 0 or 1 y_train = [] for q in range(self.num_queries): y_train.append(np.zeros((1, train_length, 2))) for i in range( int(self.training[0] * len(self.y_data)), int(self.training[0] * len(self.x_data)) + train_length): y_train[q][0][i][self.y_data[i][q]] = 1 #Validation val_length = int(self.validation[1] * len(self.x_data)) - int( self.validation[0] * len(self.x_data)) x_val = np.zeros((1, val_length, self.num_queries)) for i in range(int(self.validation[0] * len(self.x_data)), int(self.validation[0] * len(self.x_data)) + val_length): for q in range(self.num_queries): x_val[0][i - int(self.validation[0] * len(self.x_data))][q] = self.x_data[i][q] y_val = [] for q in range(self.num_queries): y_val.append(np.zeros((1, val_length, 2))) for i in range( int(self.validation[0] * len(self.y_data)), int(self.validation[0] * len(self.x_data)) + val_length): y_val[q][0][i - int(self.validation[0] * len(self.y_data))][self.y_data[i][q]] = 1 self.xtrain = x_train self.ytrain = y_train self.xval = x_val self.yval = y_val print("") print("Window size " + str(self.data_size) + " days") print("") print("Training Model...") self.predictor1.train() self.baseline1.train(input, output) self.baseline2.train(input, output) self.predictor3.train(x_train, y_train, x_val, y_val) print("... Done Training")
class QOnlineRetail: def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 1) #self.data_size = 24 self.data_size = 7 self.num_queries = 2000 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=True) #self.predictor2 = RNNModel () self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = simple_model((None, self.num_queries + 1), 2) #self.baseline3 = AverageModel(threshold=0.75, regularization=True) def validate_predictor(self): input = [] output = [] week_data = [] for days in range(int(360 * self.validation[0]), int(360 * self.validation[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.validation[0]) + self.data_size, int(360 * self.validation[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) print() print("Linear Algebra Model") #self.predictor1.try_ktruncations (input,output) self.predictor1.test_model(input, output, verbose=False) #self.predictor1.print_concepts() print() print("Previous Day Naive Model") self.baseline1.test_model(input, output, verbose=False) print() print("Earliest Day Naive Model") self.baseline2.test_model(input, output, verbose=False) #print () #print ("Average of Past Days Model") #self.baseline3.test_model (input, output, verbose=False) print() print("RNN Model") test_model(self.predictor3, self.xval, self.yval) #self.predictor2.test_model_keras (input, output) #self.predictor2.test_model(input, output, verbose=False) def train_data(self): input = [] output = [] week_data = [] for days in range(int(360 * self.training[0]), int(360 * self.training[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.training[0]) + self.data_size, int(360 * self.training[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) self.predictor1.set_training_data(input, output) self.x_data = self.data[:-1] self.y_data = self.data[1:] #x_train = [] #for q in range (self.num_queries): # x_train.append(self.x_data [int(self.training[0] * len (self.x_data)): #int(self.training[1] * len (self.x_data)) ][q]) #x_train = np.array (x_train) train_length = int(self.training[1] * len(self.y_data)) - int( self.training[0] * len(self.y_data)) x_train = np.zeros( (self.num_queries, train_length, 1 + self.num_queries)) for q in range(self.num_queries): for i in range(int(self.training[0] * len(self.x_data)), int(self.training[1] * len(self.x_data))): x_train[q][i - int(self.training[0] * len(self.x_data))][0] = self.x_data[i][q] x_train[q][i - int(self.training[0] * len(self.x_data))][q + 1] = 1 y_train = np.zeros((self.num_queries, train_length, 2)) for q in range(self.num_queries): for i in range(int(self.training[0] * len(self.y_data)), int(self.training[1] * len(self.y_data))): y_train[q][i - int(self.training[0] * len(self.y_data))][self.y_data[i][q]] = 1 #y_train = [] #y_train.append(self.y_data [int(self.training[0] * len (self.y_data)): int(self.training[1] * len (self.y_data) )]) #y_train = np.array (y_train) #Validation #x_val = [] #for q in range (self.num_queries): # x_val.append(self.x_data [int(self.validation[0] * len (self.x_data)): int(self.validation[1] * len (self.x_data) )]) #x_val = np.array (x_val) x_val = np.zeros( (self.num_queries, int(self.validation[1] * len(self.x_data)) - int(self.validation[0] * len(self.x_data)), 1 + self.num_queries)) for q in range(self.num_queries): for i in range(int(self.validation[0] * len(self.x_data)), int(self.validation[1] * len(self.x_data))): x_val[q][i - int(self.validation[0] * len(self.x_data))][0] = self.x_data[i][q] x_val[q][i - int(self.validation[0] * len(self.x_data))][q + 1] = 1 #encoding using one-hot method y_val = np.zeros( (self.num_queries, int(self.validation[1] * len(self.y_data)) - int(self.validation[0] * len(self.y_data)), 2)) for q in range(self.num_queries): for i in range(int(self.validation[0] * len(self.y_data)), int(self.validation[1] * len(self.y_data))): y_val[q][i - int(self.validation[0] * len(self.y_data))][self.y_data[i][q]] = 1 #y_val = [] #y_val.append(self.y_data [int(self.validation[0] * len (self.y_data)): int(self.validation[1] * len (self.y_data) )]) #y_val = np.array (y_val) print("") print("Window size " + str(self.data_size) + " days") print("") print("Training Model...") self.predictor1.train() self.baseline1.train(input, output) self.baseline2.train(input, output) train(self.predictor3, x_train, y_train, x_val, y_val) self.xtrain = x_train self.ytrain = y_train self.xval = x_val self.yval = y_val #self.baseline3.train (input, output) #self.predictor2.train (input, output) print("... Done Training")
class ElectricityPrediction: def __init__(self): self.data = [] self.training = (0, 0.5) self.validation = (0.5, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 96 self.num_queries = 10 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 self.ds = Electricity() data = self.ds.get_data() transpose = np.transpose(data) transpose = transpose[:self.num_queries] data = np.transpose(transpose) self.training_data = data[int(self.training[0] * len(data)):int(self.training[1] * len(data))] self.validation_data = data[int(self.validation[0] * len(data)):int(self.validation[1] * len(data))] self.testing_data = data[int(self.training[0] * len(data)):int(self.training[1] * len(data))] #for i in range (int(len (transpose) / self.num_queries)): #print (self.data) self.predictor1 = QLearn(threshold=0.5, regularization=False) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() self.predictor3 = RNNModel(self.num_queries, rnn_type="LSTM", optimizer_type="adam", learning_rate=0.001, layers=2, hidden_size=64, recurrent_dropout=0.2) #self.baseline3 = AverageModel(threshold=0.75, regularization=True) def validate_predictor(self): input = [] output = [] validation_data = self.validation_data.tolist() week_data = [] for days in range(0, self.data_size): today = validation_data[days] week_data = week_data + today for days in range(self.data_size, len(self.validation_data)): today = validation_data[days] input.append(week_data) output.append(today) week_data = week_data + today week_data = week_data[len(self.validation_data[0]):] print() print("Linear Algebra Model") #self.predictor1.try_ktruncations (input,output) self.predictor1.test_model(input, output, verbose=False) #self.predictor1.print_concepts() print() print("Previous Day Naive Model") self.baseline1.test_model(input, output, verbose=False) print() print("Earliest Day Naive Model") self.baseline2.test_model(input, output, verbose=False) #print () #print ("Average of Past Days Model") #self.baseline3.test_model (input, output, verbose=False) print() print("RNN Model") self.predictor3.test_model(self.xval, self.yval) #self.predictor2.test_model_keras (input, output) #self.predictor2.test_model(input, output, verbose=False) def train_data(self): input = [] output = [] training_data = self.training_data.tolist() week_data = [] for days in range(0, self.data_size): today = training_data[days] week_data = week_data + today for days in range(self.data_size, len(self.training_data)): today = training_data[days] input.append(week_data) output.append(today) week_data = week_data + today week_data = week_data[len(self.training_data[0]):] self.predictor1.set_training_data(input, output) self.x_data = self.data[:-1] self.y_data = self.data[1:] x_train = np.array([self.training_data[:-1]]) y_train = np.array([self.training_data[1:]]) x_val = np.array([self.validation_data[:-1]]) y_val = np.array([self.validation_data[1:]]) self.xtrain = x_train self.ytrain = y_train self.xval = x_val self.yval = y_val print("") print("Window size: " + str(self.data_size)) print("") print("Training Model...") self.predictor1.train() self.baseline1.train(input, output) self.baseline2.train(input, output) self.predictor3.train(x_train, y_train, x_val, y_val) print("... Done Training")
class QOnlineRetail: def __init__(self): self.data = [] self.training = (0, 0.6) self.validation = (0.6, 0.8) self.testing = (0.8, 0.95) #self.data_size = 24 self.data_size = 7 self.num_queries = 200 # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 #print (self.data) #self.predictor1 = QLearn(threshold=0.5, regularization=True) self.baseline1 = NaiveModel() self.baseline2 = EarliestModel() saved_models = [( 10, "LSTM", "sgd", 0.001, 2, 32, "../results/10Qs/LSTM/2L/32HS/sgd-0.001lr/20180731-134128-683987/900epochs.h5" )] model_num = 0 self.num_queries = saved_models[model_num][0] # q = 1, epochs = 100 # q = 5, epochs = 20 # q = 100, epochs = 10 # q = 2000, epochs = 3 with open(DATA_DIR, 'r') as f: #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f: reader = csv.reader(f, delimiter=',') title_row = True for row in reader: if title_row: self.products = row title_row = False else: integer_data = [ int(row[i]) for i in range(self.num_queries) ] self.data.append(integer_data) self.predictor3 = RNNModel(saved_models[model_num][0], rnn_type=saved_models[model_num][1], optimizer_type=saved_models[model_num][2], learning_rate=saved_models[model_num][3], layers=saved_models[model_num][4], hidden_size=saved_models[model_num][5]) self.predictor3.load_model(saved_models[model_num][6]) #self.baseline3 = AverageModel(threshold=0.75, regularization=True) def validate_predictor(self): input = [] output = [] week_data = [] for days in range(int(360 * self.validation[0]), int(360 * self.validation[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.validation[0]) + self.data_size, int(360 * self.validation[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) #print () #print ("Linear Algebra Model") #self.predictor1.try_ktruncations (input,output) #self.predictor1.test_model (input, output, verbose=False) #self.predictor1.print_concepts() print() print("Previous Day Naive Model") self.baseline1.test_model(input, output, verbose=False) print() print("Earliest Day Naive Model") self.baseline2.test_model(input, output, verbose=False) #print () #print ("Average of Past Days Model") #self.baseline3.test_model (input, output, verbose=False) print() print("RNN Model") self.predictor3.test_model(self.xval, self.yval) #self.predictor2.test_model_keras (input, output) #self.predictor2.test_model(input, output, verbose=False) def train_data(self): input = [] output = [] week_data = [] for days in range(int(360 * self.training[0]), int(360 * self.training[0]) + self.data_size): today = self.data[days] week_data = week_data + today for days in range( int(360 * self.training[0]) + self.data_size, int(360 * self.training[1])): today = self.data[days] input.append(week_data) output.append(today) week_data = week_data + today for i in range(len(self.data[0])): week_data.pop(0) #self.predictor1.set_training_data (input, output) self.x_data = self.data[:-1] self.y_data = self.data[1:] train_length = int(self.training[1] * len(self.y_data)) - int( self.training[0] * len(self.y_data)) x_train = np.zeros( (1, train_length, self.num_queries )) # 1 example, number of time steps, number of queries for i in range(int(self.training[0] * len(self.x_data)), int(self.training[0] * len(self.x_data)) + train_length): for q in range(self.num_queries): x_train[0][i][q] = self.x_data[i][ q] # sets the query at the time step to either 0 or 1 y_train = np.zeros( (1, train_length, self.num_queries )) # 1 example, number of time steps, number of queries for i in range(int(self.training[0] * len(self.y_data)), int(self.training[0] * len(self.y_data)) + train_length): for q in range(self.num_queries): y_train[0][i][q] = self.y_data[i][q] """ y_train = [] for q in range (self.num_queries): y_train.append(np.zeros ((1, train_length, 2))) for i in range (int(self.training[0]*len (self.y_data)), int(self.training[0] * len (self.x_data)) + train_length): y_train[q][0][i][self.y_data[i][q]] = 1 """ #Validation val_length = int(self.validation[1] * len(self.x_data)) - int( self.validation[0] * len(self.x_data)) x_val = np.zeros((1, val_length, self.num_queries)) for i in range(int(self.validation[0] * len(self.x_data)), int(self.validation[0] * len(self.x_data)) + val_length): for q in range(self.num_queries): x_val[0][i - int(self.validation[0] * len(self.x_data))][q] = self.x_data[i][q] y_val = np.zeros((1, val_length, self.num_queries)) for i in range(int(self.validation[0] * len(self.y_data)), int(self.validation[0] * len(self.y_data)) + val_length): for q in range(self.num_queries): y_val[0][i - int(self.validation[0] * len(self.y_data))][q] = self.y_data[i][q] self.xtrain = x_train self.ytrain = y_train self.xval = x_val self.yval = y_val self.baseline1.train(input, output) self.baseline2.train(input, output)