def train(self):
        """
        Trains LSTM.
        Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j
        and trains upon that.
        :return:
        """
        timesteps = None
        training_done = False
        loss_func = AlphaLoss()
        which_schedule = None

        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            # Quick Fix
            found_a_suitable_candidate = False
            while not found_a_suitable_candidate:
                rand_timestep_within_sched = np.random.randint(len(self.X))
                which_schedule = find_which_schedule_this_belongs_to(
                    self.schedule_array, rand_timestep_within_sched)
                if rand_timestep_within_sched + 2 > self.schedule_array[
                        which_schedule][1]:
                    pass
                else:
                    found_a_suitable_candidate = True
                    timesteps = [
                        rand_timestep_within_sched,
                        rand_timestep_within_sched + 1,
                        rand_timestep_within_sched + 2
                    ]
                    self.model.reinitialize_hidden_to_random()
                    load_in_embedding_bnn(self.model, self.embedding_list,
                                          which_schedule)

            for timestep in timesteps:
                truth = self.Y[timestep]
                previous_hidden_state = tuple(
                    [t.detach().cuda() for t in self.model.hidden])

                input_nn = self.X[timestep]
                truth_nn = self.Y[timestep]

                if torch.cuda.is_available():
                    input_nn = Variable(
                        torch.Tensor(np.asarray(input_nn).reshape(
                            1,
                            242)).cuda())  # change to 5 to increase batch size
                    P = Variable(torch.Tensor(np.ones((1, 20)))).cuda()
                    P *= self.distribution_epsilon
                    P[0][truth_nn] = 1 - 19 * self.distribution_epsilon
                    truth = Variable(
                        torch.Tensor(
                            np.asarray(truth_nn).reshape(1)).cuda().long())
                else:
                    input_nn = Variable(
                        torch.Tensor(np.asarray(input_nn).reshape(1, 242)))
                    P = Variable(
                        torch.Tensor(
                            np.ones((1, 20) * self.distribution_epsilon)))
                    P[0][truth_nn] = 1 - 19 * self.distribution_epsilon
                    truth = Variable(
                        torch.Tensor(np.asarray(truth_nn).reshape(1)).long())

                self.opt.zero_grad()
                output = self.model.forward(input_nn, previous_hidden_state)

                loss = loss_func.forward(P, output, self.alpha)
                if loss.item() < .05 or loss.item() > 5:
                    pass
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   0.5)
                    self.opt.step()
                self.total_loss_array.append(loss.item())

            self.embedding_list = store_embedding_back_bnn(
                self.model, self.embedding_list, which_schedule)
            self.total_iterations += 1

            if self.total_iterations > 25 and self.total_iterations % 50 == 1:
                print('total iterations is', self.total_iterations)
                print('total loss (average for each 40, averaged)',
                      np.mean(self.total_loss_array[-40:]))

            if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1:
                self.save_trained_nets('lstm_small' + str(self.num_schedules))

            if self.total_iterations > 11000 and np.mean(
                    self.total_loss_array[-100:]) - np.mean(
                        self.total_loss_array[-500:]
                    ) < self.convergence_epsilon:
                training_done = True
    def evaluate_on_test_data(self, load_in_model=False):
        """
        Evaluate performance of a trained network tuned upon the alpha divergence loss.
        Note this function is called after training convergence
        :return:
        """
        num_schedules = 75
        loss_func = AlphaLoss()
        # load in new data
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str(
            num_schedules) + '_inf_hetero_deadline_naive.pkl'

        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_dataset(num_schedules=num_schedules,
                                                  data=data)
        for i, each_element in enumerate(X):
            X[i] = each_element + list(range(20))

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []

        embedding_optimizer = torch.optim.Adam(
            self.model.EmbeddingList.parameters(), lr=.001)

        embedding_list = [
            torch.ones(1, 8) * 1 / 3 for i in range(num_schedules)
        ]

        if load_in_model:  # TODO: somehow get the string when the update_model flag is true
            self.model.load_state_dict(
                torch.load(
                    '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/NN_homog.tar'
                )['nn_state_dict'])

        for i, schedule in enumerate(schedule_array):
            self.model.reinitialize_hidden_to_random()
            load_in_embedding_bnn(self.model, embedding_list, i)
            for count in range(schedule[0], schedule[1] + 1):
                previous_hidden_state = tuple(
                    [t.detach().cuda() for t in self.model.hidden])
                net_input = X[count]
                truth = Y[count]

                if torch.cuda.is_available():
                    input_nn = Variable(
                        torch.Tensor(np.asarray(net_input).reshape(
                            1, 242)).cuda())
                    truth = Variable(
                        torch.Tensor(
                            np.asarray(truth).reshape(1)).cuda().long())
                    P = Variable(torch.Tensor(np.ones((1, 20)))).cuda()
                    P *= self.distribution_epsilon
                    P[0][truth] = 1 - 19 * self.distribution_epsilon
                else:
                    input_nn = Variable(
                        torch.Tensor(np.asarray(net_input).reshape(1, 242)))
                    truth = Variable(torch.Tensor(
                        np.asarray(truth).reshape(1)))
                    P = Variable(torch.Tensor(np.ones((1, 20))))
                    P *= self.distribution_epsilon
                    P[0][truth] = 1 - 19 * self.distribution_epsilon

                #####forward#####
                output = self.model.forward(input_nn, previous_hidden_state)

                loss = loss_func.forward(P, output, self.alpha)
                if loss.item() < .05 or loss.item() > 5:
                    pass
                else:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   0.5)
                    embedding_optimizer.step()

                index = torch.argmax(output).item()

                # top 3
                _, top_three = torch.topk(output, 3)

                if index == truth.item():
                    prediction_accuracy[0] += 1

                if truth.item() in top_three.detach().cpu().tolist()[0]:
                    prediction_accuracy[1] += 1

            store_embedding_back_bnn(self.model, embedding_list, i)
            # schedule finished
            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20,
                  ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', i)
            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)

            prediction_accuracy = [0, 0]
        self.save_performance_results(
            percentage_accuracy_top1, percentage_accuracy_top3,
            'inf_blstm_small_' + str(self.num_schedules))
    def evaluate_on_test_data(self, load_in_model=False):
        """
        Evaluate performance of a trained network tuned upon the alpha divergence loss.
        Note this function is called after training convergence
        :return:
        """
        num_schedules = 75
        # load in new data
        loss_func = AlphaLoss()
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str(
            num_schedules) + '_inf_hetero_deadline_pairwise.pkl'

        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_data(num_schedules, data)
        start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(
            X)

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []

        embedding_optimizer = torch.optim.Adam(
            self.model.EmbeddingList.parameters(), lr=.001)

        embedding_list = [
            torch.ones(1, 8) * 1 / 3 for i in range(num_schedules)
        ]

        if load_in_model:  # TODO: somehow get the string when the update_model flag is true
            self.model.load_state_dict(
                torch.load(
                    '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/NN_homog.tar'
                )['nn_state_dict'])

        for j in range(0, num_schedules):
            schedule_bounds = schedule_array[j]
            step = schedule_bounds[0]
            load_in_embedding_bnn(self.model, embedding_list, j)
            self.model.reinitialize_hidden_to_random()
            while step < schedule_bounds[1]:
                probability_matrix = np.zeros((20, 20))
                previous_hidden_state = tuple(
                    [t.detach().cuda() for t in self.model.hidden])

                for m, counter in enumerate(range(step, step + 20)):
                    phi_i = X[counter]
                    phi_i_numpy = np.asarray(phi_i)

                    # for each set of twenty
                    for n, second_counter in enumerate(range(step, step + 20)):
                        # fill entire array with diagonals set to zero
                        if second_counter == counter:  # same as m = n
                            continue
                        phi_j = X[second_counter]
                        phi_j_numpy = np.asarray(phi_j)

                        feature_input = phi_i_numpy - phi_j_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())

                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))

                        # push through nets
                        preference_prob = self.model.forward(
                            feature_input, previous_hidden_state)
                        probability_matrix[m][n] = preference_prob[
                            0].data.detach()[0].item(
                            )  # TODO: you can do a check if only this line leads to the same thing as the line below
                        # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item()

                # Set of twenty is completed
                column_vec = np.sum(probability_matrix, axis=1)

                # top 1
                choice = np.argmax(column_vec)

                # top 3
                _, top_three = torch.topk(torch.Tensor(column_vec), 3)

                # Then do training update loop
                truth = Y[step]

                # index top 1
                if choice == truth:
                    prediction_accuracy[0] += 1

                # index top 3
                if truth in top_three:
                    prediction_accuracy[1] += 1

                # forward
                phi_i_num = truth + step  # old method: set_of_twenty[0] + truth
                phi_i = X[phi_i_num]
                phi_i_numpy = np.asarray(phi_i)
                # iterate over pairwise comparisons
                for counter in range(step, step + 20):
                    if counter == phi_i_num:  # if counter == phi_i_num:
                        continue
                    else:
                        phi_j = X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_i_numpy - phi_j_numpy

                        # label = add_noise_pairwise(label, self.noise_percentage)
                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            P = Variable(
                                torch.Tensor([
                                    1 - self.distribution_epsilon,
                                    self.distribution_epsilon
                                ]).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            P = Variable(
                                torch.Tensor([
                                    1 - self.distribution_epsilon,
                                    self.distribution_epsilon
                                ]))

                        output = self.model(feature_input,
                                            previous_hidden_state)
                        loss = loss_func.forward(P, output, self.alpha)
                        # prepare optimizer, compute gradient, update params
                        if loss.item() < .001 or loss.item() > 50:
                            pass
                        else:
                            embedding_optimizer.zero_grad()
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(
                                self.model.parameters(), 0.5)
                            embedding_optimizer.step()

                for counter in range(step, step + 20):
                    if counter == phi_i_num:
                        continue
                    else:
                        phi_j = X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_j_numpy - phi_i_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            P = Variable(
                                torch.Tensor([
                                    self.distribution_epsilon,
                                    1 - self.distribution_epsilon
                                ]).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            P = Variable(
                                torch.Tensor([
                                    self.distribution_epsilon,
                                    1 - self.distribution_epsilon
                                ]))

                        output = self.model(feature_input,
                                            previous_hidden_state)
                        loss = loss_func.forward(P, output, self.alpha)
                        # print('loss is :', loss.item())
                        # clip any very high gradients

                        # prepare optimizer, compute gradient, update params
                        if loss.item() < .001 or loss.item() > 50:
                            pass
                        else:
                            embedding_optimizer.zero_grad()
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(
                                self.model.parameters(), 0.5)
                            embedding_optimizer.step()

                # add average loss to array
                store_embedding_back_bnn(self.model, embedding_list, j)
                step += 20

            # schedule finished
            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20,
                  ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', j)
            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)

            prediction_accuracy = [0, 0]
        self.save_performance_results(
            percentage_accuracy_top1, percentage_accuracy_top3,
            'inf_blstm_small_' + str(self.num_schedules))
    def train(self):
        """
        Trains NN.
        Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j
        and trains upon that.
        :return:
        """
        sets_of_twenty = None
        which_schedule = None
        training_done = False
        loss_func = AlphaLoss()

        # variables to keep track of loss and number of tasks trained over
        running_loss_predict_tasks = 0
        num_iterations_predict_task = 0
        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            # Quick Fix
            found_a_suitable_candidate = False
            while not found_a_suitable_candidate:
                rand_timestep_within_sched = np.random.randint(
                    len(self.start_of_each_set_twenty))
                set_of_twenty = self.start_of_each_set_twenty[
                    rand_timestep_within_sched]
                which_schedule = find_which_schedule_this_belongs_to(
                    self.schedule_array, set_of_twenty)
                if set_of_twenty + 59 > self.schedule_array[which_schedule][1]:
                    pass
                else:
                    found_a_suitable_candidate = True
                    sets_of_twenty = [
                        set_of_twenty, set_of_twenty + 20, set_of_twenty + 40
                    ]
                    self.model.reinitialize_hidden_to_random()
                    load_in_embedding_bnn(self.model, self.embedding_list,
                                          which_schedule)

            # 3 schedules
            for set_of_twenty in sets_of_twenty:
                truth = self.Y[set_of_twenty]

                # find feature vector of true action taken
                phi_i_num = truth + set_of_twenty
                phi_i = self.X[phi_i_num]
                phi_i_numpy = np.asarray(phi_i)
                # changes each timestep
                previous_hidden_state = tuple(
                    [t.detach().cuda() for t in self.model.hidden])
                # iterate over pairwise comparisons
                for counter in range(set_of_twenty, set_of_twenty + 20):
                    if counter == phi_i_num:  # if counter == phi_i_num:
                        continue
                    else:
                        phi_j = self.X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_i_numpy - phi_j_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            P = Variable(
                                torch.Tensor([
                                    1 - self.distribution_epsilon,
                                    self.distribution_epsilon
                                ]).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            P = Variable(
                                torch.Tensor([
                                    1 - self.distribution_epsilon,
                                    self.distribution_epsilon
                                ]))

                        output = self.model.forward(feature_input,
                                                    previous_hidden_state)
                        self.opt.zero_grad()
                        self.embedding_optimizer.zero_grad()
                        loss = loss_func.forward(P, output, self.alpha)
                        if torch.isnan(loss):
                            print(self.alpha, ' :nan occurred at iteration ',
                                  self.total_iterations)

                        if loss.item() < .001 or loss.item() > 55:
                            pass
                        else:
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(
                                self.model.parameters(), 0.5)
                            self.opt.step()
                            self.embedding_optimizer.step()

                        running_loss_predict_tasks += loss.item()
                        num_iterations_predict_task += 1

                # second loop
                for counter in range(set_of_twenty, set_of_twenty + 20):
                    if counter == phi_i_num:
                        continue
                    else:
                        phi_j = self.X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_j_numpy - phi_i_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            P = Variable(
                                torch.Tensor([
                                    self.distribution_epsilon,
                                    1 - self.distribution_epsilon
                                ]).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            P = Variable(
                                torch.Tensor([
                                    self.distribution_epsilon,
                                    1 - self.distribution_epsilon
                                ]))

                        output = self.model.forward(feature_input,
                                                    previous_hidden_state)
                        self.opt.zero_grad()
                        self.embedding_optimizer.zero_grad()
                        loss = loss_func.forward(P, output, self.alpha)
                        if torch.isnan(loss):
                            print(self.alpha, ' :nan occurred at iteration ',
                                  self.total_iterations, ' at',
                                  num_iterations_predict_task)

                        if loss.item() < .001 or loss.item() > 55:
                            pass
                        else:
                            loss.backward()
                            torch.nn.utils.clip_grad_norm_(
                                self.model.parameters(), 0.5)
                            self.opt.step()
                            self.embedding_optimizer.step()

                        running_loss_predict_tasks += loss.item()

                        num_iterations_predict_task += 1

                self.embedding_list = store_embedding_back_bnn(
                    self.model, self.embedding_list, which_schedule)
                self.total_loss_array.append(running_loss_predict_tasks /
                                             num_iterations_predict_task)
                num_iterations_predict_task = 0
                running_loss_predict_tasks = 0

            self.total_iterations += 1

            if self.total_iterations > 25 and self.total_iterations % 50 == 1:
                print('total iterations is', self.total_iterations)
                print('total loss (average for each 40, averaged)',
                      np.mean(self.total_loss_array[-40:]))

            if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1:
                self.save_trained_nets('bnn_small' + str(self.num_schedules))

            if self.total_iterations > 2000 and np.mean(
                    self.total_loss_array[-100:]) - np.mean(
                        self.total_loss_array[-500:]
                    ) < self.convergence_epsilon:
                training_done = True
Exemple #5
0
    def evaluate_on_test_data(self):
        """
        Evaluate performance of a trained network.
        This is tested on 20% of the data and will be stored in a text file.
        :return:
        """
        loss_func = AlphaLoss()
        num_schedules = 75
        # load in new data
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str(
            num_schedules) + '_inf_hetero_deadline_naive.pkl'

        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_dataset(data, num_schedules)
        for i, each_element in enumerate(X):
            X[i] = each_element + list(range(20))

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []

        embedding_optimizer = torch.optim.SGD(
            self.model.EmbeddingList.parameters(), lr=.001)

        embedding_list = [
            torch.ones(1, 8) * 1 / 3 for i in range(num_schedules)
        ]

        for i, schedule in enumerate(schedule_array):
            load_in_embedding_bnn(self.model, embedding_list, i)
            for count in range(schedule[0], schedule[1] + 1):

                net_input = X[count]
                truth = Y[count]

                if torch.cuda.is_available():
                    input_nn = Variable(
                        torch.Tensor(np.asarray(net_input).reshape(
                            1, 242)).cuda())
                    truth = Variable(
                        torch.Tensor(
                            np.asarray(truth).reshape(1)).cuda().long())
                    P = Variable(torch.Tensor(np.ones((1, 20)))).cuda()
                    P *= self.distribution_epsilon
                    P[0][truth] = 1 - 19 * self.distribution_epsilon
                else:
                    input_nn = Variable(
                        torch.Tensor(np.asarray(net_input).reshape(1, 242)))
                    truth = Variable(torch.Tensor(
                        np.asarray(truth).reshape(1)))
                    P = Variable(torch.Tensor(np.ones((1, 20))))
                    P *= self.distribution_epsilon
                    P[0][truth] = 1 - 19 * self.distribution_epsilon

                #####forward#####
                output = self.model.forward(input_nn)
                loss = loss_func.forward(P, output, self.alpha)
                if loss.item() < .05 or loss.item() > 5:
                    pass
                else:
                    embedding_optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   0.5)
                    embedding_optimizer.step()
                index = torch.argmax(output).item()

                # top 3
                _, top_three = torch.topk(output, 3)

                if index == truth.item():
                    prediction_accuracy[0] += 1

                if truth.item() in top_three.detach().cpu().tolist()[0]:
                    prediction_accuracy[1] += 1

            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20,
                  ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', i)

            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)
            prediction_accuracy = [0, 0]
            store_embedding_back_bnn(self.model, embedding_list, i)
        print(np.mean(percentage_accuracy_top1))
        self.save_performance_results(
            percentage_accuracy_top1, percentage_accuracy_top3,
            'inf_bnn_small_' + str(self.num_schedules))
Exemple #6
0
    def train(self):
        """
        Trains BDT.
        Randomly samples a schedule and timestep within that schedule, and passes in the corresponding data in an attempt to classify which task was scheduled
        :return:
        """
        total_iterations = 0
        loss_func = AlphaLoss()
        training_done = False

        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            rand_timestep_within_sched = np.random.randint(len(self.X))
            input_nn = self.X[rand_timestep_within_sched]
            truth_nn = self.Y[rand_timestep_within_sched]

            which_schedule = find_which_schedule_this_belongs_to(
                self.schedule_array, rand_timestep_within_sched)
            load_in_embedding_bnn(self.model, self.embedding_list,
                                  which_schedule)

            # iterate over pairwise comparisons
            if torch.cuda.is_available():
                input_nn = Variable(
                    torch.Tensor(np.asarray(input_nn).reshape(
                        1, 242)).cuda())  # change to 5 to increase batch size
                P = Variable(torch.Tensor(np.ones((1, 20)))).cuda()
                P *= self.distribution_epsilon
                P[0][truth_nn] = 1 - 19 * self.distribution_epsilon
                truth = Variable(
                    torch.Tensor(
                        np.asarray(truth_nn).reshape(1)).cuda().long())
            else:
                input_nn = Variable(
                    torch.Tensor(np.asarray(input_nn).reshape(1, 242)))
                P = Variable(
                    torch.Tensor(np.ones((1, 20) * self.distribution_epsilon)))
                P[0][truth_nn] = 1 - 19 * self.distribution_epsilon
                truth = Variable(
                    torch.Tensor(np.asarray(truth_nn).reshape(1)).long())

            self.opt.zero_grad()
            self.embedding_optimizer.zero_grad()
            output = self.model.forward(input_nn)

            loss = loss_func.forward(P, output, self.alpha)
            if loss.item() < .05 or loss.item() > 5:
                pass
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                self.opt.step()
                self.embedding_optimizer.step()

            self.total_loss_array.append(loss.item())

            total_iterations = len(self.total_loss_array)

            self.embedding_list = store_embedding_back_bnn(
                self.model, self.embedding_list, which_schedule)

            if total_iterations % 50 == 49:
                print('loss at', total_iterations,
                      ', total loss (average for each 40, averaged)',
                      np.mean(self.total_loss_array[-40:]))

            if total_iterations > 20000:
                training_done = True
Exemple #7
0
def tune_both():

    bnn = NeuronBNN()
    # now we let network tune both
    bnn.__init__()
    data, labels = create_dual_neuron_dataset(100)

    params = list(bnn.parameters())
    del params[6]
    opt = torch.optim.SGD(params, lr=.0001)
    embedding_opt = torch.optim.SGD(bnn.EmbeddingList[0].parameters(), lr=.01)

    embedding_list = [torch.ones(1, 1) * 1 / 2 for _ in range(100)]
    loss = nn.L1Loss()  # can use L1 as well, shouldn't matter too much
    epochs = 1000
    # sorry for the copy paste
    # even sets of twenty are lam = 1
    # odd sets of twenty are lam = 0
    even_lambdas = np.linspace(0, 1960, num=50)
    for epoch in range(epochs):
        for j in range(5):
            # chose an even schedule
            even = int(np.random.choice(even_lambdas))
            load_in_embedding_bnn(bnn, embedding_list, int(even / 20))
            for i in range(even, even + 20):
                x = data[i][0:2]
                label = labels[i]
                x = torch.Tensor([x]).reshape((2))
                label = torch.Tensor([label]).reshape((1, 1))
                output = bnn.forward(x)
                if j % 2 == 0:
                    opt.zero_grad()
                    error = loss(output, label)
                    error.backward()
                    opt.step()
                else:
                    # opt.zero_grad()
                    embedding_opt.zero_grad()
                    error = loss(output, label)
                    error.backward()
                    embedding_opt.step()
                    # opt.step()

            embedding_list = store_embedding_back_bnn(bnn, embedding_list,
                                                      int(even / 20))

        for j in range(5):
            # chose an even schedule
            odd = int(np.random.choice(even_lambdas)) + 20
            load_in_embedding_bnn(bnn, embedding_list, int(odd / 20))
            for i in range(odd, odd + 20):
                x = data[i][0:2]
                label = labels[i]
                x = torch.Tensor([x]).reshape((2))
                label = torch.Tensor([label]).reshape((1, 1))
                output = bnn.forward(x)
                if j % 2 == 0:
                    opt.zero_grad()
                    error = loss(output, label)
                    error.backward()
                    opt.step()
                else:
                    # opt.zero_grad()
                    embedding_opt.zero_grad()
                    error = loss(output, label)
                    error.backward()
                    embedding_opt.step()
                    # opt.step()
            embedding_list = store_embedding_back_bnn(bnn, embedding_list,
                                                      int(odd / 20))

    test_data, test_labels = create_dual_neuron_dataset(20)
    print(bnn.state_dict())
    avg_loss = 0
    test_embedding_list = [torch.ones(1, 1) * 1 / 2 for _ in range(20)]
    embedding_opt = torch.optim.SGD(bnn.EmbeddingList[0].parameters(), lr=.1)
    counter = 0
    for i in range(20 * 20):
        load_in_embedding_bnn(bnn, test_embedding_list, int(i / 20))
        x = test_data[i][0:2]
        label = test_labels[i]
        x = torch.Tensor([x]).reshape((2))
        label = torch.Tensor([label]).reshape((1, 1))
        output = bnn.forward(x)

        error = loss(output, label)
        print('output is ', output)
        print('label is ', label)
        print('error is ', error.item())
        avg_loss += error.item()
        if error.item() < .05:
            counter += 1
        if error.item() > .05:
            flag = False
            tracker = 0
            while not flag:
                embedding_opt.zero_grad()
                error.backward()
                embedding_opt.step()
                output = bnn.forward(x)
                error = loss(output, label)
                tracker += 1
                if tracker > 100:
                    flag = True
                if error.item() < .1:
                    flag = True
        test_embedding_list = store_embedding_back_bnn(bnn,
                                                       test_embedding_list,
                                                       int(i / 20))

    print(test_embedding_list)
    print(embedding_list)
    avg_loss /= 400
    print('avg loss is', avg_loss)
    print('accuracy', counter / 400)