Esempio n. 1
0
    def run_iters(self):
        T = self.params['t']

        results = {
            'rewards': [],
            'sup_rewards': [],
            'surr_losses': [],
            'sup_losses': [],
            'sim_errs': []
        }
        trajs = []

        beta = self.params['beta']

        snapshots = []
        for i in range(self.params['iters'][-1]):
            print("\tIteration: " + str(i))

            if i in self.params['update']:
                self.lnr.train(verbose=True)

            if i == 0:
                states, i_actions, _, _ = statistics.collect_traj(
                    self.env, self.sup, T, False)
                trajs.append((states, i_actions))
                states, i_actions = utils.filter_data(self.params, states,
                                                      i_actions)
                self.lnr.add_data(states, i_actions)
                self.lnr.train()

            else:
                states, _, _, _ = statistics.collect_traj_beta(
                    self.env, self.sup, self.lnr, T, beta, False)
                i_actions = [self.sup.intended_action(s) for s in states]
                states, i_actions = utils.filter_data(self.params, states,
                                                      i_actions)
                self.lnr.add_data(states, i_actions)
                beta = beta * beta

            if ((i + 1) in self.params['iters']):
                snapshots.append((self.lnr.X[:], self.lnr.y[:]))

        for j in range(len(snapshots)):
            X, y = snapshots[j]
            self.lnr.X, self.lnr.y = X, y
            self.lnr.train(verbose=True)
            print("\nData from snapshot: " + str(self.params['iters'][j]))
            it_results = self.iteration_evaluation()

            results['sup_rewards'].append(it_results['sup_reward_mean'])
            results['rewards'].append(it_results['reward_mean'])
            results['surr_losses'].append(it_results['surr_loss_mean'])
            results['sup_losses'].append(it_results['sup_loss_mean'])
            results['sim_errs'].append(it_results['sim_err_mean'])

        for key in results.keys():
            results[key] = np.array(results[key])
        return results
Esempio n. 2
0
    def run_iters(self):
        T = self.params['t']

        results = {
            'rewards': [],
            'sup_rewards': [],
            'surr_losses': [],
            'sup_losses': [],
            'sim_errs': []
        }
        trajs = []

        d = self.params['d']
        new_cov = np.random.normal(0, 1, (d, d))
        new_cov = new_cov.T.dot(new_cov)
        new_cov = new_cov / np.trace(new_cov) * self.params['trace']
        self.sup = GaussianSupervisor(self.net_sup, new_cov)

        snapshots = []
        for i in range(self.params['iters'][-1]):
            print("\tIteration: " + str(i))

            states, i_actions, _, _ = statistics.collect_traj(
                self.env, self.sup, T, False)
            trajs.append((states, i_actions))
            states, i_actions = utils.filter_data(self.params, states,
                                                  i_actions)

            self.lnr.add_data(states, i_actions)

            if ((i + 1) in self.params['iters']):
                snapshots.append((self.lnr.X[:], self.lnr.y[:]))

        for j in range(len(snapshots)):
            X, y = snapshots[j]
            self.lnr.X, self.lnr.y = X, y
            self.lnr.train(verbose=True)
            print("\nData from snapshot: " + str(self.params['iters'][j]))
            it_results = self.iteration_evaluation()

            results['sup_rewards'].append(it_results['sup_reward_mean'])
            results['rewards'].append(it_results['reward_mean'])
            results['surr_losses'].append(it_results['surr_loss_mean'])
            results['sup_losses'].append(it_results['sup_loss_mean'])
            results['sim_errs'].append(it_results['sim_err_mean'])

        for key in results.keys():
            results[key] = np.array(results[key])
        return results