Python ProLoNet.state_dictの例

プログラミング言語: Python

名前空間/パッケージ名: base_testing_environment.prolonet

クラス/型: ProLoNet

メソッド/関数: state_dict

hotexamples.comのコード掲載数: 4

Python ProLoNet.state_dict - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのbase_testing_environment.prolonet.ProLoNet.state_dictの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

parameters(8)

ProLoNet(5)

forward(5)

get_bayesian_embedding(5)

set_bayesian_embedding(5)

state_dict(4)

cuda(3)

load_state_dict(2)

コード例 #1

ファイルを表示

class ProLoTrain:
    """
    class structure to train the BDT with a certain alpha.
    This class handles training the BDT, evaluating the BDT, and saving
    """

    def __init__(self,num_schedules):
        self.arguments = Logger()
        self.alpha = .9
        self.num_schedules = num_schedules
        self.home_dir = self.arguments.home_dir
        self.total_loss_array = []

        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            self.num_schedules) + 'dist_early_hili_pairwise.pkl'

        self.data = pickle.load(open(load_directory, "rb"))
        self.X, self.Y, self.schedule_array = create_new_data(self.num_schedules, self.data)
        self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(self.X)

        self.model = ProLoNet(input_dim=len(self.X[0]),
                              weights=None,
                              comparators=None,
                              leaves=16,
                              output_dim=1,
                              bayesian_embedding_dim=None,
                              alpha=1.5,
                              use_gpu=True,
                              vectorized=True,
                              is_value=True)

        use_gpu = True
        if use_gpu:
            self.model = self.model.cuda()
        print(self.model.state_dict())
        self.opt = torch.optim.RMSprop(self.model.parameters())

        self.num_iterations_predict_task = 0
        self.total_iterations = 0
        self.covergence_epsilon = .01
        self.when_to_save = 1000
        self.distribution_epsilon = .0001


    def train(self):
        """
        Trains BDT.
        Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j
        and trains upon that.
        :return:
        """
        # loss = nn.CrossEntropyLoss()

        training_done = False
        criterion = torch.nn.BCELoss()

        # variables to keep track of loss and number of tasks trained over
        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            set_of_twenty = np.random.choice(self.start_of_each_set_twenty)
            truth = self.Y[set_of_twenty]

            # find feature vector of true action taken
            phi_i_num = truth + set_of_twenty
            phi_i = self.X[phi_i_num]
            phi_i_numpy = np.asarray(phi_i)
            running_loss_predict_tasks = 0
            num_iterations_predict_task = 0
            # iterate over pairwise comparisons
            for counter in range(set_of_twenty, set_of_twenty + 20):
                if counter == phi_i_num:  # if counter == phi_i_num:
                    label = torch.ones((1, 1))
                else:
                    label = torch.zeros((1, 1))
                phi_j = self.X[counter]
                phi = np.asarray(phi_j)
                feature_input = phi

                if torch.cuda.is_available():
                    feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda())
                    label = Variable(torch.Tensor(label).cuda())
                else:
                    feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)))
                    label = Variable(torch.Tensor(label.reshape(1, 1)))

                output = self.model.forward(feature_input)
                sig = torch.nn.Sigmoid()
                output = sig(output)

                self.opt.zero_grad()
                loss = criterion(output, label)
                if counter == phi_i_num:
                    loss *= 25
                # print(self.total_iterations)
                if torch.isnan(loss):
                    print('nan occurred at iteration ', self.total_iterations, ' at', num_iterations_predict_task)
                loss.backward()
                # print(self.model.state_dict())
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                self.opt.step()
                running_loss_predict_tasks += loss.item()
                num_iterations_predict_task += 1

            # add average loss to array
            # print(list(self.model.parameters()))

            self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task)


            self.total_iterations += 1

            if self.total_iterations % 500 == 499:
                print('total loss (average for each 40, averaged) at iteration ', self.total_iterations, ' is ', np.mean(self.total_loss_array[-40:]))

            if self.total_iterations > 10000 and np.mean(self.total_loss_array[-100:]) - np.mean(
                    self.total_loss_array[-500:]) < self.covergence_epsilon:
                training_done = True


    def evaluate_on_test_data(self, model, load_in_model=False):
        """
        Evaluate performance of a trained network tuned upon the alpha divergence loss.
        This is tested on 20% of the data and will be stored in a text file.
        Note this function is called after training convergence
        :return:
        """
        # define new optimizer that only optimizes gradient
        num_schedules = 100
        # load in new data
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            num_schedules) + 'test_dist_early_hili_pairwise.pkl'

        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_data(num_schedules, data)

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []

        if load_in_model:
            model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar')['nn_state_dict'])


        for j in range(0, num_schedules):
            schedule_bounds = schedule_array[j]
            step = schedule_bounds[0]
            while step < schedule_bounds[1]:
                probability_vector = np.zeros((1, 20))

                for m, counter in enumerate(range(step, step + 20)):
                    phi_i = X[counter]
                    phi_i_numpy = np.asarray(phi_i)



                    feature_input = phi_i_numpy

                    if torch.cuda.is_available():
                        feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda())
                    else:
                        feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)))

                    # push through nets
                    preference_prob = model.forward(feature_input)
                    sig = torch.nn.Sigmoid()
                    preference_prob = sig(preference_prob)
                    probability_vector[0][m] = preference_prob[0].data.detach()[
                        0].item()
                        # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item()

                print(probability_vector)
                highest_val = max(probability_vector[0])
                all_indexes_that_have_highest_val = [i for i, e in enumerate(list(probability_vector[0])) if e == highest_val]
                # top 1
                choice = np.random.choice(all_indexes_that_have_highest_val)
                # choice = np.argmax(probability_vector)

                # top 3
                _, top_three = torch.topk(torch.Tensor(probability_vector), 3)

                # Then do training update loop
                truth = Y[step]

                # index top 1
                if choice == truth:
                    prediction_accuracy[0] += 1

                # index top 3
                if truth in top_three:
                    prediction_accuracy[1] += 1

                # add average loss to array
                step += 20

            # schedule finished
            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', j)
            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)

            prediction_accuracy = [0, 0]
        self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'DDT_pointwise'+ str(self.num_schedules))


    def save_trained_nets(self, name):
        """
        saves the model
        :return:
        """
        torch.save({'nn_state_dict': self.model.state_dict(),
                    'parameters': self.arguments},
                   '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/BNN_' + name + '.tar')

    def save_performance_results(self, top1, top3, special_string):
        """
        saves performance of top1 and top3
        :return:
        """
        print('top1_mean for ', self.alpha, ' is : ', np.mean(top1))
        data = {'top1_mean': np.mean(top1),
                'top3_mean': np.mean(top3),
                'top1_stderr': np.std(top1) / np.sqrt(len(top1)),
                'top3_stderr': np.std(top3) / np.sqrt(len(top3))}
        save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=special_string)

コード例 #2

ファイルを表示

ファイル: DDT_unimodal.py プロジェクト: ghost12331/Personalized-Apprenticeship-Learning-from-Heterogeneous-Decision-Makers

class ProLoTrain:
    """
    class structure to train the BDT with a certain alpha.
    This class handles training the BDT, evaluating the BDT, and saving
    """
    def __init__(self, num_schedules):
        self.arguments = Logger()
        self.alpha = .9
        self.num_schedules = num_schedules
        self.home_dir = self.arguments.home_dir
        self.total_loss_array = []

        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            self.num_schedules) + 'dist_early_hili_pairwise.pkl'

        self.data = pickle.load(open(load_directory, "rb"))
        self.X, self.Y, self.schedule_array = create_new_data(
            self.num_schedules, self.data)
        self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(
            self.X)

        self.model = ProLoNet(input_dim=len(self.X[0]),
                              weights=None,
                              comparators=None,
                              leaves=32,
                              output_dim=1,
                              bayesian_embedding_dim=8,
                              alpha=1.5,
                              use_gpu=True,
                              vectorized=True,
                              is_value=True)

        use_gpu = True
        if use_gpu:
            self.model = self.model.cuda()
        print(self.model.state_dict())
        self.opt = torch.optim.RMSprop(
            [{
                'params': list(self.model.parameters())[:-1]
            }, {
                'params': self.model.bayesian_embedding.parameters(),
                'lr': .01
            }],
            lr=.01)

        self.num_iterations_predict_task = 0
        self.total_iterations = 0
        self.covergence_epsilon = .01
        self.when_to_save = 1000
        self.distribution_epsilon = .0001
        self.embedding_list = [
            torch.ones(8) * 1 / 3 for _ in range(self.num_schedules)
        ]

    def train(self):
        """
        Trains BDT.
        Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j
        and trains upon that.
        :return:
        """
        # loss = nn.CrossEntropyLoss()
        sig = torch.nn.Sigmoid()
        training_done = False
        criterion = torch.nn.BCELoss()

        # variables to keep track of loss and number of tasks trained over
        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            set_of_twenty = np.random.choice(self.start_of_each_set_twenty)
            truth = self.Y[set_of_twenty]
            which_schedule = find_which_schedule_this_belongs_to(
                self.schedule_array, set_of_twenty)
            self.model.set_bayesian_embedding(
                self.embedding_list[which_schedule])
            # find feature vector of true action taken
            phi_i_num = truth + set_of_twenty
            phi_i = self.X[phi_i_num]
            phi_i_numpy = np.asarray(phi_i)
            running_loss_predict_tasks = 0
            num_iterations_predict_task = 0
            # iterate over pairwise comparisons
            for counter in range(set_of_twenty, set_of_twenty + 20):
                # positive counterfactuals
                if counter == phi_i_num:  # if counter == phi_i_num:
                    continue
                else:
                    phi_j = self.X[counter]
                    phi_j_numpy = np.asarray(phi_j)
                    feature_input = phi_i_numpy - phi_j_numpy

                    if torch.cuda.is_available():
                        feature_input = Variable(
                            torch.Tensor(feature_input.reshape(1, 13)).cuda())
                        label = Variable(
                            torch.Tensor(torch.ones((1, 1))).cuda())

                    else:
                        feature_input = Variable(
                            torch.Tensor(feature_input.reshape(1, 13)))
                        label = Variable(torch.Tensor(torch.ones((1, 1))))

                    output = self.model.forward(feature_input)
                    sig = torch.nn.Sigmoid()
                    output = sig(output)

                    self.opt.zero_grad()
                    loss = criterion(output, label)
                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                    self.opt.step()
                    running_loss_predict_tasks += loss.item()
                    num_iterations_predict_task += 1

            # Negative counterfactuals
            for counter in range(set_of_twenty, set_of_twenty + 20):
                if counter == phi_i_num:
                    continue
                else:
                    phi_j = self.X[counter]
                    phi_j_numpy = np.asarray(phi_j)
                    feature_input = phi_j_numpy - phi_i_numpy

                    if torch.cuda.is_available():
                        feature_input = Variable(
                            torch.Tensor(feature_input.reshape(1, 13)).cuda())
                        label = Variable(
                            torch.Tensor(torch.zeros((1, 1))).cuda())
                    else:
                        feature_input = Variable(
                            torch.Tensor(feature_input.reshape(1, 13)))
                        label = Variable(torch.Tensor(torch.zeros((1, 1))))

                    output = self.model.forward(feature_input)
                    sig = torch.nn.Sigmoid()
                    output = sig(output)
                    loss = criterion(output, label)
                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                    self.opt.step()

                    running_loss_predict_tasks += loss.item()

                    num_iterations_predict_task += 1

            # add average loss to array
            # print(list(self.model.parameters()))

            self.total_loss_array.append(running_loss_predict_tasks /
                                         num_iterations_predict_task)

            self.embedding_list[which_schedule] = torch.Tensor(
                self.model.get_bayesian_embedding().detach().cpu().numpy()
            )  # very ugly

            self.total_iterations += 1

            if self.total_iterations % 500 == 499:
                print(
                    'total loss (average for each 40, averaged) at iteration ',
                    self.total_iterations, ' is ',
                    np.mean(self.total_loss_array[-40:]))

            if self.total_iterations > 10000 and np.mean(
                    self.total_loss_array[-100:]) - np.mean(
                        self.total_loss_array[-500:]) < self.covergence_epsilon:
                training_done = True

    def evaluate_on_test_data(self, model, load_in_model=False):
        """
        Evaluate performance of a trained network tuned upon the alpha divergence loss.
        This is tested on 20% of the data and will be stored in a text file.
        Note this function is called after training convergence
        :return:
        """
        # define new optimizer that only optimizes gradient
        num_schedules = 100
        # load in new data
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            num_schedules) + 'test_dist_early_hili_pairwise.pkl'
        sig = torch.nn.Sigmoid()
        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_data(num_schedules, data)

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []
        embedding_optimizer = torch.optim.SGD(
            [{
                'params': self.model.bayesian_embedding.parameters()
            }], lr=.01)
        criterion = torch.nn.BCELoss()

        embedding_list = [torch.ones(3) * 1 / 3 for i in range(num_schedules)]

        for j in range(0, num_schedules):
            schedule_bounds = schedule_array[j]
            step = schedule_bounds[0]
            model.set_bayesian_embedding(embedding_list[j])

            while step < schedule_bounds[1]:
                probability_matrix = np.zeros((20, 20))

                for m, counter in enumerate(range(step, step + 20)):
                    phi_i = X[counter]
                    phi_i_numpy = np.asarray(phi_i)

                    # for each set of twenty
                    for n, second_counter in enumerate(range(step, step + 20)):
                        # fill entire array with diagnols set to zero
                        if second_counter == counter:  # same as m = n
                            continue
                        phi_j = X[second_counter]
                        phi_j_numpy = np.asarray(phi_j)

                        feature_input = phi_i_numpy - phi_j_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())

                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))

                        # push through nets
                        preference_prob = model.forward(feature_input)
                        sig = torch.nn.Sigmoid()
                        preference_prob = sig(preference_prob)
                        probability_matrix[m][n] = preference_prob[
                            0].data.detach()[0].item(
                            )  # TODO: you can do a check if only this line leads to the same thing as the line below
                        # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item()

                # Set of twenty is completed
                column_vec = np.sum(probability_matrix, axis=1)

                embedding_list[j] = torch.Tensor(
                    self.model.get_bayesian_embedding().detach().cpu().numpy(
                    ))  # very ugly

                # top 1
                # given all inputs, and their liklihood of being scheduled, predict the output
                highest_val = max(column_vec)
                all_indexes_that_have_highest_val = [
                    i for i, e in enumerate(list(column_vec))
                    if e == highest_val
                ]
                if len(all_indexes_that_have_highest_val) > 1:
                    print('length of indexes greater than 1: ',
                          all_indexes_that_have_highest_val)
                # top 1
                choice = np.random.choice(all_indexes_that_have_highest_val)
                # choice = np.argmax(probability_vector)

                # top 3
                _, top_three = torch.topk(torch.Tensor(column_vec), 3)

                # Then do training update loop
                truth = Y[step]

                # index top 1
                if choice == truth:
                    prediction_accuracy[0] += 1

                # index top 3
                if truth in top_three:
                    prediction_accuracy[1] += 1

                # Then do training update loop

                phi_i_num = truth + step
                phi_i = X[phi_i_num]
                phi_i_numpy = np.asarray(phi_i)
                # iterate over pairwise comparisons
                for counter in range(step, step + 20):
                    if counter == phi_i_num:
                        continue
                    else:
                        phi_j = X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_i_numpy - phi_j_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            label = Variable(
                                torch.Tensor(torch.ones((1, 1))).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            label = Variable(torch.Tensor(torch.ones((1, 1))))

                        output = model(feature_input)
                        output = sig(output)
                        loss = criterion(output, label)
                        # prepare optimizer, compute gradient, update params

                        embedding_optimizer.zero_grad()
                        loss.backward()
                        # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                        embedding_optimizer.step()
                        # print(model.EmbeddingList.state_dict())

                for counter in range(step, step + 20):
                    if counter == phi_i_num:
                        continue
                    else:
                        phi_j = X[counter]
                        phi_j_numpy = np.asarray(phi_j)
                        feature_input = phi_j_numpy - phi_i_numpy

                        if torch.cuda.is_available():
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1,
                                                                   13)).cuda())
                            label = Variable(
                                torch.Tensor(torch.zeros((1, 1))).cuda())
                        else:
                            feature_input = Variable(
                                torch.Tensor(feature_input.reshape(1, 13)))
                            label = Variable(torch.Tensor(torch.zeros((1, 1))))

                        output = model.forward(feature_input)
                        output = sig(output)

                        embedding_optimizer.zero_grad()
                        loss = criterion(output, label)

                        loss.backward()
                        # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                        embedding_optimizer.step()
                        # print(model.EmbeddingList.state_dict())
                # add average loss to array
                step += 20

            # schedule finished
            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20,
                  ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', j)
            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)

            prediction_accuracy = [0, 0]
        self.save_performance_results(
            percentage_accuracy_top1, percentage_accuracy_top3,
            'PDDT_pairwise' + str(self.num_schedules))

    def save_trained_nets(self, name):
        """
        saves the model
        :return:
        """
        torch.save(
            {
                'nn_state_dict': self.model.state_dict(),
                'parameters': self.arguments
            },
            '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/BNN_'
            + name + '.tar')

    def save_performance_results(self, top1, top3, special_string):
        """
        saves performance of top1 and top3
        :return:
        """
        print('top1_mean for ', self.alpha, ' is : ', np.mean(top1))
        data = {
            'top1_mean': np.mean(top1),
            'top3_mean': np.mean(top3),
            'top1_stderr': np.std(top1) / np.sqrt(len(top1)),
            'top3_stderr': np.std(top3) / np.sqrt(len(top3))
        }
        save_pickle(file=data,
                    file_location=self.home_dir +
                    '/saved_models/pairwise_saved_models/',
                    special_string=special_string)

コード例 #3

ファイルを表示

ファイル: PDDT_pairwise.py プロジェクト: ghost12331/Personalized-Apprenticeship-Learning-from-Heterogeneous-Decision-Makers

class Trainer:
    def __init__(self):

        self.loss_array = []

        checkpoint = torch.load(
            '/home/ghost/PycharmProjects/scheduling_environment/learn_action_embeddings.pkl'
        )
        self.action_embedding_list = checkpoint['embedding_list']
        self.all_data_train_dir = '/home/ghost/PycharmProjects/scheduling_environment' + '/training_encoding_states_starcraft'

        self.mmr = '/home/ghost/PycharmProjects/scheduling_environment' + '/games_that_have_an_win_loss.pkl'
        self.list_of_games_mmr_train = pickle.load(open(self.mmr, "rb"))
        self.size_of_training_set = len(self.list_of_games_mmr_train)

        self.all_data_test_dir = '/home/ghost/PycharmProjects/scheduling_environment/testing_encoding_states_starcraft'

        self.gamma = .9
        self.criterion = torch.nn.BCELoss()
        self.not_converged = True
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.embedding_list_Bayesian_neur_net = [
            torch.ones(12) * 1 / 2 for i in range(4000)
        ]

        self.model = ProLoNet(input_dim=256 + 36,
                              weights=None,
                              comparators=None,
                              leaves=64,
                              output_dim=1,
                              bayesian_embedding_dim=12,
                              alpha=1.5,
                              use_gpu=True,
                              vectorized=True,
                              is_value=True)

        self.optimizer_main_net = torch.optim.Adam(
            [{
                'params': list(self.model.parameters())[:-1]
            }, {
                'params': self.model.bayesian_embedding.parameters(),
                'lr': .01
            }],
            lr=.001)
        self.embedding_optimizer_bnn = torch.optim.SGD(
            self.model.bayesian_embedding.parameters(), lr=.1)

    # noinspection PyArgumentList
    def train(self, num_steps_for_batch):
        sig = torch.nn.Sigmoid()
        iteration = 0
        while self.not_converged:

            # files in training directory
            files_in_training_directory = os.listdir(self.all_data_train_dir)
            # choose a batch of data
            batch = np.random.choice(files_in_training_directory)
            # open batch
            set_of_games = pickle.load(
                open(os.path.join(self.all_data_train_dir, batch), 'rb'))
            games = set_of_games['state_embeddings']  # state embedding dict
            actions = set_of_games['actions_at_each_frame']
            players = set_of_games['player_per_game']
            big_loss = set_of_games['big_loss']  # maybe games to skip

            for l in range(num_steps_for_batch):
                file_with_low_loss_found = False
                filename = None
                # loop to find a filename with low loss (fairly inefficient)
                while not file_with_low_loss_found:
                    filename = np.random.choice(list(games.keys()))
                    if len(big_loss[filename]) < 50:
                        file_with_low_loss_found = True

                states_filename = games[filename]
                actions_filename = actions[filename]
                player_filename = players[filename]
                length_of_game = len(states_filename)

                # print("iteration ", iteration)

                # choose a random frame
                frame = np.random.randint(1, length_of_game)
                player_id = int(re.findall('\d+', batch)[0]) + list(
                    games.keys()).index(filename)
                self.model.set_bayesian_embedding(
                    self.embedding_list_Bayesian_neur_net[player_id])
                X = states_filename[frame]  # input - a
                actions_taken_at_frame = actions_filename[
                    frame]  # set of actions taken
                actions_taken_list = [
                    i for i, e in enumerate(actions_taken_at_frame) if e != 0
                ]
                set_of_non_actions = self.compute_set_of_non_actions(
                    actions_taken_list)

                # True
                Y = torch.ones((1, 1))  # label

                if torch.cuda.is_available():
                    X = Variable(torch.Tensor(X).cuda())
                    Y = Variable(torch.Tensor(Y).cuda())
                else:
                    X = Variable(torch.Tensor(X))
                    Y = Variable(torch.Tensor(Y))

                running_loss = []
                for a in actions_taken_list:
                    action_embedding_a = self.action_embedding_list[int(a)]
                    for i, non_action in enumerate(set_of_non_actions):
                        action_embeddimg_a_prime = self.action_embedding_list[
                            non_action]
                        subtracted_input = action_embedding_a.cuda(
                        ) - action_embeddimg_a_prime.cuda()
                        prediction = self.model(
                            torch.cat([X, subtracted_input.reshape(36)],
                                      dim=0).reshape(1, 292))
                        prediction = sig(prediction)
                        prediction_loss = self.criterion(prediction, Y)
                        self.optimizer_main_net.zero_grad()
                        # if int(a) != 0:
                        #     prediction_loss *= 5
                        prediction_loss.backward()
                        self.optimizer_main_net.step()
                        running_loss.append(prediction_loss.item())

                # False
                Y = torch.zeros((1, 1))  # label
                if torch.cuda.is_available():
                    Y = Variable(torch.Tensor(Y).cuda())
                else:
                    Y = Variable(torch.Tensor(Y))

                for a in actions_taken_list:
                    action_embedding_a = self.action_embedding_list[int(a)]
                    for i, non_action in enumerate(set_of_non_actions):
                        action_embeddimg_a_prime = self.action_embedding_list[
                            non_action]
                        subtracted_input = action_embeddimg_a_prime.cuda(
                        ) - action_embedding_a.cuda()
                        prediction = self.model(
                            torch.cat([X, subtracted_input.reshape(36)],
                                      dim=0).reshape(1, 292))
                        prediction = sig(prediction)
                        # print(prediction)
                        prediction_loss = self.criterion(prediction, Y)
                        self.optimizer_main_net.zero_grad()
                        prediction_loss.backward()
                        self.optimizer_main_net.step()
                        running_loss.append(prediction_loss.item())
                if iteration > 55 and iteration % 50 == 49:
                    print('Mean loss for iteration: ', iteration, ' is ',
                          np.mean(running_loss[-50:]))
                self.loss_array.append(np.mean(running_loss))

                self.embedding_list_Bayesian_neur_net[
                    player_id] = torch.Tensor(
                        self.model.get_bayesian_embedding().detach().cpu(
                        ).numpy())  # very ugly

                iteration += 1
                if iteration % 1000 == 999:
                    # self.plot_networks()
                    self.save()

                if iteration > 6000 and np.mean(
                        self.loss_array[-500:]) - np.mean(
                            self.loss_array[-1000:]) < .001:
                    self.not_converged = False

    # noinspection PyArgumentList
    def test(self, load_in_model=False):
        iteration = 0
        tot_test_games = 0
        max_frames = 0
        loss_lists_per_timestep_nn = []
        sig = torch.nn.Sigmoid()
        game_accuracies = []
        embedding_list = [torch.ones(12) * 1 / 2 for i in range(4000)]
        if load_in_model == True:
            checkpoint = torch.load(
                '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT.pkl'
            )
            self.model.load_state_dict(checkpoint['state_dict'])
        files_in_testing_directory = os.listdir(self.all_data_test_dir)
        for each_batch in files_in_testing_directory:
            print(each_batch)
            set_of_games = pickle.load(
                open(os.path.join(self.all_data_test_dir, each_batch), 'rb'))

            games = set_of_games['state_embeddings']  # state embedding dict
            actions = set_of_games['actions_at_each_frame']
            players = set_of_games['player_per_game']
            big_loss = set_of_games['big_loss']  # maybe games to skip
            tot_test_games += len(games.keys())

            for game_num, filename in enumerate(games.keys()):
                if len(big_loss[filename]) > 50:
                    continue
                states_filename = games[filename]
                actions_filename = actions[filename]
                player_filename = players[filename]
                player_id = int(re.findall('\d+', each_batch)[0]) + list(
                    games.keys()).index(filename)
                self.model.set_bayesian_embedding(embedding_list[player_id])
                print("iteration ", iteration)
                frame = 0
                iteration += 1
                length_of_game = len(states_filename)

                # choose a random frame
                while frame < length_of_game - 2:
                    self.probability_matrix_nn = np.zeros((40, 40))
                    print('reached frame ', frame)

                    # choose a random frame

                    X = states_filename[frame]  # input - a
                    actions_taken_at_frame = actions_filename[
                        frame]  # set of actions taken
                    actions_taken_list = [
                        i for i, e in enumerate(actions_taken_at_frame)
                        if e != 0
                    ]
                    set_of_non_actions = self.compute_set_of_non_actions(
                        actions_taken_list)

                    if torch.cuda.is_available():
                        X = Variable(torch.Tensor(X).cuda())

                    else:
                        X = Variable(torch.Tensor(X))

                    for i in range(40):
                        action_embedding_a = self.action_embedding_list[int(i)]
                        for j in range(40):
                            if i == j:
                                continue
                            else:
                                action_embedding_a_prime = self.action_embedding_list[
                                    int(j)]

                                subtracted_input = action_embedding_a.cuda(
                                ) - action_embedding_a_prime.cuda()
                                prediction = self.model.forward(
                                    torch.cat(
                                        [X, subtracted_input.reshape(36)],
                                        dim=0).reshape(1, 292))
                                prediction = sig(prediction)
                                # add all these to matrixes

                                self.probability_matrix_nn[i][
                                    j] = prediction.item()

                    column_vec_nn = np.sum(self.probability_matrix_nn, axis=1)

                    loss = torch.nn.BCELoss()
                    soft = nn.Softmax(dim=0)
                    column_vec_nn = soft(torch.Tensor(column_vec_nn))
                    loss_nn = loss(
                        column_vec_nn,
                        Variable(torch.Tensor(actions_taken_at_frame)))

                    # loss_nn = np.linalg.norm(column_vec_nn - actions_taken_at_frame)

                    loss_lists_per_timestep_nn.append(loss_nn.item())
                    # True
                    Y = torch.ones((1, 1))  # label

                    if torch.cuda.is_available():
                        Y = Variable(torch.Tensor(Y).cuda())
                    else:
                        Y = Variable(torch.Tensor(Y))

                    for a in actions_taken_list:
                        action_embedding_a = self.action_embedding_list[int(a)]
                        for i, non_action in enumerate(set_of_non_actions):
                            action_embeddimg_a_prime = self.action_embedding_list[
                                non_action]
                            subtracted_input = action_embedding_a.cuda(
                            ) - action_embeddimg_a_prime.cuda()
                            prediction = self.model(
                                torch.cat([X, subtracted_input.reshape(36)],
                                          dim=0).reshape(1, 292))
                            prediction = sig(prediction)
                            prediction_loss = self.criterion(prediction, Y)
                            self.embedding_optimizer_bnn.zero_grad()
                            prediction_loss.backward()
                            self.embedding_optimizer_bnn.step()

                    # False
                    Y = torch.zeros((1, 1))  # label
                    if torch.cuda.is_available():
                        Y = Variable(torch.Tensor(Y).cuda())
                    else:
                        Y = Variable(torch.Tensor(Y))

                    for a in actions_taken_list:
                        action_embedding_a = self.action_embedding_list[int(a)]
                        for i, non_action in enumerate(set_of_non_actions):
                            action_embeddimg_a_prime = self.action_embedding_list[
                                non_action]
                            subtracted_input = action_embeddimg_a_prime.cuda(
                            ) - action_embedding_a.cuda()
                            prediction = self.model(
                                torch.cat([X, subtracted_input.reshape(36)],
                                          dim=0).reshape(1, 292))
                            prediction = sig(prediction)
                            prediction_loss = self.criterion(prediction, Y)
                            self.embedding_optimizer_bnn.zero_grad()
                            prediction_loss.backward()
                            self.embedding_optimizer_bnn.step()

                    frame += 1
                game_accuracies.append(np.mean(loss_lists_per_timestep_nn))
                loss_lists_per_timestep_nn = []
                self.print_and_store_accs(game_accuracies)
                if len(game_accuracies) == 5:
                    self.save_embeddings(embedding_list)
                if len(game_accuracies) == 15:
                    self.print_and_store_accs(game_accuracies)
                    self.save_embeddings()
                    exit()

                # game has ended

                # do end of schedule tings

            # a batch has ended
        # finished errthang

        print(tot_test_games)
        print(max_frames)

        # self.plot_with_errorbars()

    def print_and_store_accs(self, game_accs):
        print('Loss: {}'.format(np.mean(game_accs)))
        file = open('starcraft_learning_results.txt', 'a')
        file.write('PDDT_pairwise: mean: ' + str(np.mean(game_accs)) +
                   ', std: ' + str(np.std(game_accs)) + '\n')
        file.close()

    def save(self):
        torch.save({
            'state_dict': self.model.state_dict()
        }, '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT.pkl'
                   )

    def save_embeddings(self, embeddings):
        torch.save(
            {
                'state_dict': self.model.state_dict(),
                'embeddings': embeddings
            },
            '/home/ghost/PycharmProjects/bayesian_prolo/starcraft/pairwise_sc2_PDDT_embeddings.pkl'
        )

    def compute_set_of_non_actions(self, actions):
        set_of_non_actions = list(range(40))
        for each_action in actions:
            set_of_non_actions.remove(each_action)
        return set_of_non_actions

コード例 #4

ファイルを表示

ファイル: DDT_unimodal.py プロジェクト: ghost12331/Personalized-Apprenticeship-Learning-from-Heterogeneous-Decision-Makers

class ProLoTrain:
    """
    class structure to train the BDT with a certain alpha.
    This class handles training the BDT, evaluating the BDT, and saving
    """

    def __init__(self, num_schedules):
        self.arguments = Logger()
        self.alpha = .9
        self.num_schedules = num_schedules  
        self.home_dir = self.arguments.home_dir
        self.total_loss_array = []

        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            self.num_schedules) + 'dist_early_hili_naive.pkl'

        self.data = pickle.load(open(load_directory, "rb"))
        self.X, self.Y, self.schedule_array = create_new_dataset(num_schedules=self.num_schedules, data=self.data)
        for i, each_element in enumerate(self.X):
            self.X[i] = each_element + list(range(20))

        self.model = ProLoNet(input_dim=len(self.X[0]),
                              weights=None,
                              comparators=None,
                              leaves=64,
                              output_dim=20,
                              bayesian_embedding_dim=8,
                              alpha=1.5,
                              use_gpu=True,
                              vectorized=True,
                              is_value=False)

        use_gpu = True
        if use_gpu:
            self.model = self.model.cuda()
        print(self.model.state_dict())
        self.opt = torch.optim.RMSprop([{'params': list(self.model.parameters())[:-1]}, {'params': self.model.bayesian_embedding.parameters(), 'lr': .01}])

        self.num_iterations_predict_task = 0
        self.total_iterations = 0
        self.covergence_epsilon = .01
        self.when_to_save = 1000
        self.distribution_epsilon = .0001

        self.max_depth = 10
        self.embedding_list = [torch.ones(8) * 1 / 3 for _ in range(self.num_schedules)]

    def train(self):
        """
        Trains PDDT.
        :return:
        """

        threshold = .05
        training_done = False

        while not training_done:
            # sample a timestep before the cutoff for cross_validation
            rand_timestep_within_sched = np.random.randint(len(self.X))
            input_nn = self.X[rand_timestep_within_sched]
            truth_nn = self.Y[rand_timestep_within_sched]

            which_schedule = find_which_schedule_this_belongs_to(self.schedule_array, rand_timestep_within_sched)
            self.model.set_bayesian_embedding(self.embedding_list[which_schedule])

            if torch.cuda.is_available():
                input_nn = Variable(torch.Tensor(np.asarray(input_nn).reshape(1, 242)).cuda())  # change to 5 to increase batch size
                truth = Variable(torch.Tensor(np.asarray(truth_nn).reshape(1)).cuda().long())
            else:
                input_nn = Variable(torch.Tensor(np.asarray(input_nn).reshape(1, 242)))
                truth = Variable(torch.Tensor(np.asarray(truth_nn).reshape(1)).long())

            self.opt.zero_grad()
            output = self.model.forward(input_nn)
            loss = F.cross_entropy(output, truth)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
            self.opt.step()

            self.embedding_list[which_schedule] = torch.Tensor(self.model.get_bayesian_embedding().detach().cpu().numpy())  # very ugly

            # add average loss to array
            # print(list(self.model.parameters()))

            self.total_loss_array.append(loss.item())
            self.total_iterations += 1

            if self.total_iterations > 25 and self.total_iterations % 50 == 1:
                print('total iterations is', self.total_iterations)
                print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:]))
                # print(self.model.state_dict())

            if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1:
                self.save_trained_nets('PDDT' + str(self.num_schedules))
                threshold -= .1

            if self.total_iterations > 100000 and np.mean(self.total_loss_array[-100:]) - np.mean(
                    self.total_loss_array[-500:]) < self.covergence_epsilon:
                training_done = True

    def evaluate_on_test_data(self, load_in_model=False):
        """
        Evaluate performance of a trained network tuned upon the alpha divergence loss.

        Note this function is called after training convergence
        :return:
        """
        # define new optimizer that only optimizes gradient
        num_schedules = 100
        load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str(
            num_schedules) + 'test_dist_early_hili_naive.pkl'

        data = pickle.load(open(load_directory, "rb"))
        X, Y, schedule_array = create_new_dataset(num_schedules=num_schedules, data=data)
        for i, each_element in enumerate(X):
            X[i] = each_element + list(range(20))

        embedding_optimizer = torch.optim.SGD([{'params': self.model.bayesian_embedding.parameters()}], lr=.1)
        embedding_list = [torch.ones(8) * 1 / 3 for _ in range(num_schedules)]

        prediction_accuracy = [0, 0]
        percentage_accuracy_top1 = []
        percentage_accuracy_top3 = []

        if load_in_model:
            self.model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar')['nn_state_dict'])

        for i, schedule in enumerate(schedule_array):
            self.model.set_bayesian_embedding(self.embedding_list[i])

            for count in range(schedule[0], schedule[1] + 1):

                net_input = X[count]
                truth = Y[count]

                if torch.cuda.is_available():
                    input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242)).cuda())  # change to 5 to increase batch size
                    truth = Variable(torch.Tensor(np.asarray(truth).reshape(1)).cuda().long())
                else:
                    input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242)))
                    truth = Variable(torch.Tensor(np.asarray(truth).reshape(1)).long())

                #####forward#####
                output = self.model.forward(input_nn)
                embedding_optimizer.zero_grad()
                loss = F.cross_entropy(output, truth)

                loss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                embedding_optimizer.step()

                index = torch.argmax(output).item()

                # top 3
                _, top_three = torch.topk(output, 3)

                if index == truth.item():
                    prediction_accuracy[0] += 1

                if truth.item() in top_three.detach().cpu().tolist()[0]:
                    prediction_accuracy[1] += 1

            # add average loss to array
            embedding_list[i] = torch.Tensor(self.model.get_bayesian_embedding().detach().cpu().numpy())  # very ugly

            # schedule finished
            print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20)

            print('schedule num:', i)
            percentage_accuracy_top1.append(prediction_accuracy[0] / 20)
            percentage_accuracy_top3.append(prediction_accuracy[1] / 20)

            prediction_accuracy = [0, 0]
        self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'DDT_w_embedding')

    def save_trained_nets(self, name):
        """
        saves the model
        :return:
        """
        torch.save({'nn_state_dict': self.model.state_dict(),
                    'parameters': self.arguments},
                   '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/naive_saved_models/BNN_' + name + '.tar')

    def save_performance_results(self, top1, top3, special_string):
        """
        saves performance of top1 and top3
        :return:
        """
        print('top1_mean for ', self.alpha, ' is : ', np.mean(top1))
        data = {'top1_mean': np.mean(top1),
                'top3_mean': np.mean(top3),
                'top1_stderr': np.std(top1) / np.sqrt(len(top1)),
                'top3_stderr': np.std(top3) / np.sqrt(len(top3))}
        save_pickle(file=data, file_location=self.home_dir + '/saved_models/naive_saved_models/', special_string=special_string)