def __init__ (self):
     self.data = []
     self.training = (0, 0.6)
     self.validation = (0.6, 0.8)
     self.testing = (0.8, 1)
     self.data_size = 7
     
     self.num_queries = 200
     #with open ('timeseriesOnlineRetailCleaned2.csv', 'r') as f:
     with open ('../data/randomizedOnlineRetail.csv', 'r' ) as f:
         reader = csv.reader(f, delimiter=',')
         title_row = True
         for row in reader:
             if title_row:
                 self.products = row
                 title_row = False
             else:
                 integer_data = [int(row[i]) for i in range (self.num_queries)]
                 self.data.append (integer_data)
     #print (self.data)
     self.predictor1 = QLearn(threshold=0.5, svd=True, regularization=True)
     #self.predictor2 = GRUModel ()
     self.baseline1 = NaiveModel()
     self.baseline2 = EarliestModel()
     self.baseline3 = AverageModel(threshold=0.75, regularization=True)
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 1)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 2000
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)
        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=True)
        #self.predictor2 = RNNModel ()
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = simple_model((None, self.num_queries + 1), 2)
예제 #3
0
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 200
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3
        #print (self.data)
        #self.predictor1 = QLearn(threshold=0.5, regularization=True)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()

        saved_models = [(
            10, "LSTM", "sgd", 0.001, 2, 32,
            "../results/10Qs/LSTM/2L/32HS/sgd-0.001lr/20180731-134128-683987/900epochs.h5"
        )]
        model_num = 0

        self.num_queries = saved_models[model_num][0]
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)

        self.predictor3 = RNNModel(saved_models[model_num][0],
                                   rnn_type=saved_models[model_num][1],
                                   optimizer_type=saved_models[model_num][2],
                                   learning_rate=saved_models[model_num][3],
                                   layers=saved_models[model_num][4],
                                   hidden_size=saved_models[model_num][5])
        self.predictor3.load_model(saved_models[model_num][6])
예제 #4
0
    def __init__(self):
        self.data = []
        self.training = (0, 0.5)
        self.validation = (0.5, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 96

        self.num_queries = 10
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        self.ds = Electricity()
        data = self.ds.get_data()
        transpose = np.transpose(data)
        transpose = transpose[:self.num_queries]
        data = np.transpose(transpose)
        self.training_data = data[int(self.training[0] *
                                      len(data)):int(self.training[1] *
                                                     len(data))]
        self.validation_data = data[int(self.validation[0] *
                                        len(data)):int(self.validation[1] *
                                                       len(data))]
        self.testing_data = data[int(self.training[0] *
                                     len(data)):int(self.training[1] *
                                                    len(data))]
        #for i in range (int(len (transpose) / self.num_queries)):

        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=False)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = RNNModel(self.num_queries,
                                   rnn_type="LSTM",
                                   optimizer_type="adam",
                                   learning_rate=0.001,
                                   layers=2,
                                   hidden_size=64,
                                   recurrent_dropout=0.2)
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 10
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)
        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=True)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = RNNModel(self.num_queries,
                                   rnn_type="LSTM",
                                   optimizer_type="sgd",
                                   learning_rate=0.001,
                                   layers=1,
                                   hidden_size=128,
                                   recurrent_dropout=0.2)
class QOnlineRetail:
    def __init__ (self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 1)
        self.data_size = 7
        
        self.num_queries = 200
        #with open ('timeseriesOnlineRetailCleaned2.csv', 'r') as f:
        with open ('../data/randomizedOnlineRetail.csv', 'r' ) as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [int(row[i]) for i in range (self.num_queries)]
                    self.data.append (integer_data)
        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, svd=True, regularization=True)
        #self.predictor2 = GRUModel ()
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.baseline3 = AverageModel(threshold=0.75, regularization=True)
    
    
    def clean_data (self):
        data_used = []
        new_data = []
        new_header = []
        
        uncleaned_training_data = self.data[int (360*self.training[0]):int(360*self.training[1])]
        #print (len (uncleaned_training_data))
        
        sums_of_columns = [ sum(x) for x in zip(*uncleaned_training_data) ]
        #print (sums_of_columns)
        
        mean = sum (sums_of_columns) / len (sums_of_columns)
        #print ("Mean = " + str(mean))
        
        variance = 0
        for i in range (0, len (sums_of_columns)):
            variance += (sums_of_columns[i]-mean) ** 2
        variance = variance / len (sums_of_columns)
        #print ("Variance = " + str(variance))
        
        
        std_dev = variance ** 0.5
        #print ("Standard Deviation = " + str(std_dev))
    
    
        mean_of_nonzero_columns = 0
        num_of_nonzero_columns = 0
        for i in range (0, len (sums_of_columns)):
            if sums_of_columns[i] > 0:
                mean_of_nonzero_columns += sums_of_columns[i]
                num_of_nonzero_columns += 1
        mean_of_nonzero_columns = mean_of_nonzero_columns / num_of_nonzero_columns
        #print ("Mean of non-zero columns = " + str(mean_of_nonzero_columns))
        
        variance_of_nonzero_columns = 0
        for i in range (0, len (sums_of_columns)):
            if sums_of_columns[i] > 0:
                variance_of_nonzero_columns += (sums_of_columns[i]-mean_of_nonzero_columns) ** 2
        variance_of_nonzero_columns = variance_of_nonzero_columns / num_of_nonzero_columns
        #print ("Variance of non-zero columns = " + str(variance_of_nonzero_columns))


        std_dev_of_nonzero_columns = variance_of_nonzero_columns ** 0.5
        #print ("Standard Deviation of non-zero columns = " + str(std_dev_of_nonzero_columns))
        
        
        # Remove data one standard deviation below the non-zero mean
        lower_cutoff = mean_of_nonzero_columns - std_dev_of_nonzero_columns +3
        print ("Lower cutoff = " + str(lower_cutoff))
        for i in range (0, len(sums_of_columns)):
            if (sums_of_columns[i] > lower_cutoff):
                data_used.append (i)
                new_header.append (self.products[i])

        for row in range (0, len (self.data)):
            new_data.append ([])
            for col in range (0, len (self.data[row])):
                if (col in data_used):
                    new_data[row].append (self.data[row][col])
        #self.data = new_data
        #print (new_data)

        print ("Writing into CSV")
        with open ('timeseriesOnlineRetailCleaned.csv', "w") as csvfile:
            writer = csv.writer(csvfile, delimiter=',')
            writer.writerow (new_header)
            writer.writerows (new_data)

        
    
    
    def validate_predictor (self):
        input = []
        output = []
        
        week_data = []
        for days in range (int (360*self.validation[0]), int (360*self.validation[0])+self.data_size):
            today = self.data[days]
            week_data = week_data+ today
    
    
        for days in range (int (360*self.validation[0])+self.data_size, int(360*self.validation[1])):
            today = self.data[days]
            input.append (week_data)
            output.append (today)
            week_data = week_data+ today
            for i in range (len (self.data[0])):
                week_data.pop (0)
        
        print ()
        print ("Linear Algebra Model")
        #self.predictor1.try_ktruncations (input,output)
        self.predictor1.test_model (input, output, verbose=False)
        #self.predictor1.print_concepts()
        
        print ()
        print ("Previous Day Naive Model")
        self.baseline1.test_model (input, output, verbose=False)
        
        print ()
        print ("Earliest Day Naive Model")
        self.baseline2.test_model (input, output, verbose=False)
        
        print ()
        print ("Average of Past Days Model")
        self.baseline3.test_model (input, output, verbose=False)
        
    
        """
        print ()
        print ("RNN Model")
        self.predictor2.test_model_keras (input, output)
        self.predictor2.test_model(input, output, verbose=False)
        """

    
    def train_data (self):
        input = []
        output = []
        
        week_data = []
        for days in range (int (360*self.training[0]), int (360*self.training[0])+self.data_size):
            today = self.data[days]
            week_data = week_data+ today
        
        
        for days in range (int (360*self.training[0])+self.data_size, int(360*self.training[1])):
            today = self.data[days]
            input.append (week_data)
            output.append (today)
            week_data = week_data+ today
            for i in range (len (self.data[0])):
                week_data.pop (0)
        self.predictor1.set_training_data (input, output)
        
        print ("")
        print ("Window size " + str (self.data_size) + " days")
        print ("")
        print ("Training Model...")
        self.predictor1.train()
        
        self.baseline1.train(input, output)
        
        self.baseline2.train (input, output)
        
        self.baseline3.train (input, output)
        
        #self.predictor2.train (input, output)
        print ("... Done Training")


    def print_concepts (self):
        self.predictor1.print_concepts()
class QOnlineRetail:
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 10
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)
        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=True)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = RNNModel(self.num_queries,
                                   rnn_type="LSTM",
                                   optimizer_type="sgd",
                                   learning_rate=0.001,
                                   layers=1,
                                   hidden_size=128,
                                   recurrent_dropout=0.2)
        #self.baseline3 = AverageModel(threshold=0.75, regularization=True)

    def validate_predictor(self):
        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.validation[0]),
                          int(360 * self.validation[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.validation[0]) + self.data_size,
                int(360 * self.validation[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)

        print()
        print("Linear Algebra Model")
        #self.predictor1.try_ktruncations (input,output)
        self.predictor1.test_model(input, output, verbose=False)
        #self.predictor1.print_concepts()

        print()
        print("Previous Day Naive Model")
        self.baseline1.test_model(input, output, verbose=False)

        print()
        print("Earliest Day Naive Model")
        self.baseline2.test_model(input, output, verbose=False)

        #print ()
        #print ("Average of Past Days Model")
        #self.baseline3.test_model (input, output, verbose=False)

        print()
        print("RNN Model")
        self.predictor3.test_model(self.xval, self.yval)
        #self.predictor2.test_model_keras (input, output)
        #self.predictor2.test_model(input, output, verbose=False)

    def train_data(self):

        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.training[0]),
                          int(360 * self.training[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.training[0]) + self.data_size,
                int(360 * self.training[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)
        self.predictor1.set_training_data(input, output)

        self.x_data = self.data[:-1]
        self.y_data = self.data[1:]

        train_length = int(self.training[1] * len(self.y_data)) - int(
            self.training[0] * len(self.y_data))
        x_train = np.zeros(
            (1, train_length, self.num_queries
             ))  # 1 example, number of time steps, number of queries
        for i in range(int(self.training[0] * len(self.x_data)),
                       int(self.training[0] * len(self.x_data)) +
                       train_length):
            for q in range(self.num_queries):
                x_train[0][i][q] = self.x_data[i][
                    q]  # sets the query at the time step to either 0 or 1

        y_train = []
        for q in range(self.num_queries):
            y_train.append(np.zeros((1, train_length, 2)))
            for i in range(
                    int(self.training[0] * len(self.y_data)),
                    int(self.training[0] * len(self.x_data)) + train_length):
                y_train[q][0][i][self.y_data[i][q]] = 1

        #Validation
        val_length = int(self.validation[1] * len(self.x_data)) - int(
            self.validation[0] * len(self.x_data))

        x_val = np.zeros((1, val_length, self.num_queries))
        for i in range(int(self.validation[0] * len(self.x_data)),
                       int(self.validation[0] * len(self.x_data)) +
                       val_length):
            for q in range(self.num_queries):
                x_val[0][i - int(self.validation[0] *
                                 len(self.x_data))][q] = self.x_data[i][q]

        y_val = []
        for q in range(self.num_queries):
            y_val.append(np.zeros((1, val_length, 2)))
            for i in range(
                    int(self.validation[0] * len(self.y_data)),
                    int(self.validation[0] * len(self.x_data)) + val_length):
                y_val[q][0][i - int(self.validation[0] *
                                    len(self.y_data))][self.y_data[i][q]] = 1

        self.xtrain = x_train
        self.ytrain = y_train
        self.xval = x_val
        self.yval = y_val

        print("")
        print("Window size " + str(self.data_size) + " days")
        print("")
        print("Training Model...")
        self.predictor1.train()

        self.baseline1.train(input, output)

        self.baseline2.train(input, output)

        self.predictor3.train(x_train, y_train, x_val, y_val)

        print("... Done Training")
class QOnlineRetail:
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 1)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 2000
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)
        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=True)
        #self.predictor2 = RNNModel ()
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = simple_model((None, self.num_queries + 1), 2)
        #self.baseline3 = AverageModel(threshold=0.75, regularization=True)

    def validate_predictor(self):
        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.validation[0]),
                          int(360 * self.validation[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.validation[0]) + self.data_size,
                int(360 * self.validation[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)

        print()
        print("Linear Algebra Model")
        #self.predictor1.try_ktruncations (input,output)
        self.predictor1.test_model(input, output, verbose=False)
        #self.predictor1.print_concepts()

        print()
        print("Previous Day Naive Model")
        self.baseline1.test_model(input, output, verbose=False)

        print()
        print("Earliest Day Naive Model")
        self.baseline2.test_model(input, output, verbose=False)

        #print ()
        #print ("Average of Past Days Model")
        #self.baseline3.test_model (input, output, verbose=False)

        print()
        print("RNN Model")
        test_model(self.predictor3, self.xval, self.yval)
        #self.predictor2.test_model_keras (input, output)
        #self.predictor2.test_model(input, output, verbose=False)

    def train_data(self):

        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.training[0]),
                          int(360 * self.training[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.training[0]) + self.data_size,
                int(360 * self.training[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)
        self.predictor1.set_training_data(input, output)

        self.x_data = self.data[:-1]
        self.y_data = self.data[1:]

        #x_train = []

        #for q in range (self.num_queries):
        #    x_train.append(self.x_data [int(self.training[0] * len (self.x_data)):
        #int(self.training[1] * len (self.x_data)) ][q])
        #x_train = np.array (x_train)

        train_length = int(self.training[1] * len(self.y_data)) - int(
            self.training[0] * len(self.y_data))

        x_train = np.zeros(
            (self.num_queries, train_length, 1 + self.num_queries))
        for q in range(self.num_queries):

            for i in range(int(self.training[0] * len(self.x_data)),
                           int(self.training[1] * len(self.x_data))):
                x_train[q][i - int(self.training[0] *
                                   len(self.x_data))][0] = self.x_data[i][q]
                x_train[q][i - int(self.training[0] * len(self.x_data))][q +
                                                                         1] = 1

        y_train = np.zeros((self.num_queries, train_length, 2))
        for q in range(self.num_queries):
            for i in range(int(self.training[0] * len(self.y_data)),
                           int(self.training[1] * len(self.y_data))):
                y_train[q][i - int(self.training[0] *
                                   len(self.y_data))][self.y_data[i][q]] = 1
        #y_train = []
        #y_train.append(self.y_data [int(self.training[0] * len (self.y_data)): int(self.training[1] * len (self.y_data) )])
        #y_train = np.array (y_train)

        #Validation

        #x_val = []
        #for q in range (self.num_queries):
        #    x_val.append(self.x_data [int(self.validation[0] * len (self.x_data)): int(self.validation[1] * len (self.x_data) )])
        #x_val = np.array (x_val)

        x_val = np.zeros(
            (self.num_queries, int(self.validation[1] * len(self.x_data)) -
             int(self.validation[0] * len(self.x_data)), 1 + self.num_queries))
        for q in range(self.num_queries):

            for i in range(int(self.validation[0] * len(self.x_data)),
                           int(self.validation[1] * len(self.x_data))):
                x_val[q][i - int(self.validation[0] *
                                 len(self.x_data))][0] = self.x_data[i][q]
                x_val[q][i - int(self.validation[0] * len(self.x_data))][q +
                                                                         1] = 1

        #encoding using one-hot method
        y_val = np.zeros(
            (self.num_queries, int(self.validation[1] * len(self.y_data)) -
             int(self.validation[0] * len(self.y_data)), 2))
        for q in range(self.num_queries):
            for i in range(int(self.validation[0] * len(self.y_data)),
                           int(self.validation[1] * len(self.y_data))):
                y_val[q][i - int(self.validation[0] *
                                 len(self.y_data))][self.y_data[i][q]] = 1
        #y_val = []
        #y_val.append(self.y_data [int(self.validation[0] * len (self.y_data)): int(self.validation[1] * len (self.y_data) )])
        #y_val = np.array (y_val)

        print("")
        print("Window size " + str(self.data_size) + " days")
        print("")
        print("Training Model...")
        self.predictor1.train()

        self.baseline1.train(input, output)

        self.baseline2.train(input, output)

        train(self.predictor3, x_train, y_train, x_val, y_val)

        self.xtrain = x_train
        self.ytrain = y_train
        self.xval = x_val
        self.yval = y_val

        #self.baseline3.train (input, output)

        #self.predictor2.train (input, output)
        print("... Done Training")
예제 #9
0
class ElectricityPrediction:
    def __init__(self):
        self.data = []
        self.training = (0, 0.5)
        self.validation = (0.5, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 96

        self.num_queries = 10
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        self.ds = Electricity()
        data = self.ds.get_data()
        transpose = np.transpose(data)
        transpose = transpose[:self.num_queries]
        data = np.transpose(transpose)
        self.training_data = data[int(self.training[0] *
                                      len(data)):int(self.training[1] *
                                                     len(data))]
        self.validation_data = data[int(self.validation[0] *
                                        len(data)):int(self.validation[1] *
                                                       len(data))]
        self.testing_data = data[int(self.training[0] *
                                     len(data)):int(self.training[1] *
                                                    len(data))]
        #for i in range (int(len (transpose) / self.num_queries)):

        #print (self.data)
        self.predictor1 = QLearn(threshold=0.5, regularization=False)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()
        self.predictor3 = RNNModel(self.num_queries,
                                   rnn_type="LSTM",
                                   optimizer_type="adam",
                                   learning_rate=0.001,
                                   layers=2,
                                   hidden_size=64,
                                   recurrent_dropout=0.2)
        #self.baseline3 = AverageModel(threshold=0.75, regularization=True)

    def validate_predictor(self):
        input = []
        output = []

        validation_data = self.validation_data.tolist()
        week_data = []
        for days in range(0, self.data_size):
            today = validation_data[days]
            week_data = week_data + today

        for days in range(self.data_size, len(self.validation_data)):
            today = validation_data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            week_data = week_data[len(self.validation_data[0]):]

        print()
        print("Linear Algebra Model")
        #self.predictor1.try_ktruncations (input,output)
        self.predictor1.test_model(input, output, verbose=False)
        #self.predictor1.print_concepts()

        print()
        print("Previous Day Naive Model")
        self.baseline1.test_model(input, output, verbose=False)

        print()
        print("Earliest Day Naive Model")
        self.baseline2.test_model(input, output, verbose=False)

        #print ()
        #print ("Average of Past Days Model")
        #self.baseline3.test_model (input, output, verbose=False)

        print()
        print("RNN Model")
        self.predictor3.test_model(self.xval, self.yval)
        #self.predictor2.test_model_keras (input, output)
        #self.predictor2.test_model(input, output, verbose=False)

    def train_data(self):
        input = []
        output = []

        training_data = self.training_data.tolist()
        week_data = []
        for days in range(0, self.data_size):
            today = training_data[days]
            week_data = week_data + today

        for days in range(self.data_size, len(self.training_data)):
            today = training_data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            week_data = week_data[len(self.training_data[0]):]

        self.predictor1.set_training_data(input, output)

        self.x_data = self.data[:-1]
        self.y_data = self.data[1:]

        x_train = np.array([self.training_data[:-1]])
        y_train = np.array([self.training_data[1:]])

        x_val = np.array([self.validation_data[:-1]])
        y_val = np.array([self.validation_data[1:]])

        self.xtrain = x_train
        self.ytrain = y_train
        self.xval = x_val
        self.yval = y_val

        print("")
        print("Window size: " + str(self.data_size))
        print("")
        print("Training Model...")
        self.predictor1.train()

        self.baseline1.train(input, output)

        self.baseline2.train(input, output)

        self.predictor3.train(x_train, y_train, x_val, y_val)

        print("... Done Training")
예제 #10
0
class QOnlineRetail:
    def __init__(self):
        self.data = []
        self.training = (0, 0.6)
        self.validation = (0.6, 0.8)
        self.testing = (0.8, 0.95)
        #self.data_size = 24
        self.data_size = 7

        self.num_queries = 200
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3
        #print (self.data)
        #self.predictor1 = QLearn(threshold=0.5, regularization=True)
        self.baseline1 = NaiveModel()
        self.baseline2 = EarliestModel()

        saved_models = [(
            10, "LSTM", "sgd", 0.001, 2, 32,
            "../results/10Qs/LSTM/2L/32HS/sgd-0.001lr/20180731-134128-683987/900epochs.h5"
        )]
        model_num = 0

        self.num_queries = saved_models[model_num][0]
        # q = 1, epochs = 100
        # q = 5, epochs = 20
        # q = 100, epochs = 10
        # q = 2000, epochs = 3

        with open(DATA_DIR, 'r') as f:
            #with open ('hourlyTimeSeriesOnlineRetailCleaned.csv', 'r') as f:
            reader = csv.reader(f, delimiter=',')
            title_row = True
            for row in reader:
                if title_row:
                    self.products = row
                    title_row = False
                else:
                    integer_data = [
                        int(row[i]) for i in range(self.num_queries)
                    ]
                    self.data.append(integer_data)

        self.predictor3 = RNNModel(saved_models[model_num][0],
                                   rnn_type=saved_models[model_num][1],
                                   optimizer_type=saved_models[model_num][2],
                                   learning_rate=saved_models[model_num][3],
                                   layers=saved_models[model_num][4],
                                   hidden_size=saved_models[model_num][5])
        self.predictor3.load_model(saved_models[model_num][6])
        #self.baseline3 = AverageModel(threshold=0.75, regularization=True)

    def validate_predictor(self):
        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.validation[0]),
                          int(360 * self.validation[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.validation[0]) + self.data_size,
                int(360 * self.validation[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)

        #print ()
        #print ("Linear Algebra Model")
        #self.predictor1.try_ktruncations (input,output)
        #self.predictor1.test_model (input, output, verbose=False)
        #self.predictor1.print_concepts()

        print()
        print("Previous Day Naive Model")
        self.baseline1.test_model(input, output, verbose=False)

        print()
        print("Earliest Day Naive Model")
        self.baseline2.test_model(input, output, verbose=False)

        #print ()
        #print ("Average of Past Days Model")
        #self.baseline3.test_model (input, output, verbose=False)

        print()
        print("RNN Model")
        self.predictor3.test_model(self.xval, self.yval)
        #self.predictor2.test_model_keras (input, output)
        #self.predictor2.test_model(input, output, verbose=False)

    def train_data(self):

        input = []
        output = []

        week_data = []
        for days in range(int(360 * self.training[0]),
                          int(360 * self.training[0]) + self.data_size):
            today = self.data[days]
            week_data = week_data + today

        for days in range(
                int(360 * self.training[0]) + self.data_size,
                int(360 * self.training[1])):
            today = self.data[days]
            input.append(week_data)
            output.append(today)
            week_data = week_data + today
            for i in range(len(self.data[0])):
                week_data.pop(0)
        #self.predictor1.set_training_data (input, output)

        self.x_data = self.data[:-1]
        self.y_data = self.data[1:]

        train_length = int(self.training[1] * len(self.y_data)) - int(
            self.training[0] * len(self.y_data))
        x_train = np.zeros(
            (1, train_length, self.num_queries
             ))  # 1 example, number of time steps, number of queries
        for i in range(int(self.training[0] * len(self.x_data)),
                       int(self.training[0] * len(self.x_data)) +
                       train_length):
            for q in range(self.num_queries):
                x_train[0][i][q] = self.x_data[i][
                    q]  # sets the query at the time step to either 0 or 1

        y_train = np.zeros(
            (1, train_length, self.num_queries
             ))  # 1 example, number of time steps, number of queries
        for i in range(int(self.training[0] * len(self.y_data)),
                       int(self.training[0] * len(self.y_data)) +
                       train_length):
            for q in range(self.num_queries):
                y_train[0][i][q] = self.y_data[i][q]
        """
            y_train = []
            for q in range (self.num_queries):
            y_train.append(np.zeros ((1, train_length, 2)))
            for i in range (int(self.training[0]*len (self.y_data)), int(self.training[0] * len (self.x_data)) + train_length):
            y_train[q][0][i][self.y_data[i][q]] = 1
        """

        #Validation
        val_length = int(self.validation[1] * len(self.x_data)) - int(
            self.validation[0] * len(self.x_data))

        x_val = np.zeros((1, val_length, self.num_queries))
        for i in range(int(self.validation[0] * len(self.x_data)),
                       int(self.validation[0] * len(self.x_data)) +
                       val_length):
            for q in range(self.num_queries):
                x_val[0][i - int(self.validation[0] *
                                 len(self.x_data))][q] = self.x_data[i][q]

        y_val = np.zeros((1, val_length, self.num_queries))
        for i in range(int(self.validation[0] * len(self.y_data)),
                       int(self.validation[0] * len(self.y_data)) +
                       val_length):
            for q in range(self.num_queries):
                y_val[0][i - int(self.validation[0] *
                                 len(self.y_data))][q] = self.y_data[i][q]

        self.xtrain = x_train
        self.ytrain = y_train
        self.xval = x_val
        self.yval = y_val

        self.baseline1.train(input, output)

        self.baseline2.train(input, output)