Example #1
0
 def __init__(self, env, num_hidden_layers, num_neurons, act_fn, ckpt = False, opt = 'gd', scope_name = 'pol'):
     self.env = env
     ckpt_split = ckpt.split('/')
     ckpt = ('/'.join(ckpt_split + [ckpt_split[1]])) + '-0'
     lr = 0
     print (ckpt)
     self.pi = NN(env.observation_space.shape[0], env.action_space.n, num_hidden_layers, num_neurons, act_fn, lr, ckpt = ckpt, opt = opt, scope_name = scope_name)
Example #2
0
class NNPolicy(Policy):
    def __init__(self,
                 env,
                 num_hidden_layers,
                 num_neurons,
                 act_fn,
                 ckpt=False,
                 opt='gd',
                 scope_name='pol'):
        self.env = env
        ckpt_split = ckpt.split('/')
        ckpt = ('/'.join(ckpt_split + [ckpt_split[1]])) + '-0'
        lr = 0
        print(ckpt)
        self.pi = NN(env.observation_space.shape[0],
                     env.action_space.n,
                     num_hidden_layers,
                     num_neurons,
                     act_fn,
                     lr,
                     ckpt=ckpt,
                     opt=opt,
                     scope_name=scope_name)

    def action(self, s):
        s = np.reshape(s, (1, len(s)))
        return self.pi.action(s)

    def action_prob(self, s, a):
        s = np.reshape(s, (1, len(s)))
        return self.pi.action_prob(s, a)
Example #3
0
    def __init__(self,
                 env,
                 tr_batch: np.array,
                 val_batch,
                 num_hidden_layers,
                 num_neurons,
                 act_fn,
                 lr,
                 comp_ris=True,
                 pi_eval=None,
                 save_dir=None,
                 preload_dir=None,
                 linear_finetune=False,
                 nn_finetune=False,
                 opt='gd'):
        self.env = env
        self.tr_batch = tr_batch
        self.val_batch = val_batch

        if preload_dir is not None:
            ckpt_split = preload_dir.split('/')
            ckpt = ('/'.join(ckpt_split + [ckpt_split[1]])) + '-0'
            preload_dir = ckpt

        self.pi = NN(env.observation_space.shape[0],
                     env.action_space.n,
                     num_hidden_layers,
                     num_neurons,
                     act_fn,
                     lr,
                     ckpt=preload_dir,
                     linear_finetune=linear_finetune,
                     nn_finetune=nn_finetune,
                     scope_name='finetunepgpol',
                     opt=opt)
        self.pi_eval = pi_eval

        # processing batch of data, generating 1-hot vectors
        data_x, data_y = utils.pre_process_batch(env, None, tr_batch, V=None)

        self.train_x, self.train_y, _, _ = utils.split_data(data_x,
                                                            data_y,
                                                            ratio=0.0)
        assert len(self.train_x) == len(data_x)
        # generating separate validation set
        data_x, data_y = utils.pre_process_batch(env, None, val_batch, V=None)
        self.val_x, self.val_y, _, _ = utils.split_data(data_x,
                                                        data_y,
                                                        ratio=0.0)

        #val_batch = utils.generate_batch(self.env, self.pi_eval, 0.05 * len(self.batch))
        #self.test_x, self.test_y = utils.pre_process_batch(env, None, val_batch, V)
        print('Number of training trajs {}'.format(len(self.tr_batch)))
        print('Number of training steps {}'.format(len(self.train_x)))
        print('Number of testing trajs {}'.format(len(val_batch)))
        print('Number of testing steps {}'.format(len(self.val_x)))

        # saving setup for early stopping
        self.psec_pi_dir = 'psec_pi_dir/'
        if save_dir is not None:
            self.psec_pi_dir = save_dir
        print('saving psec net in {}'.format(self.psec_pi_dir))
        if os.path.isdir(self.psec_pi_dir):
            shutil.rmtree(self.psec_pi_dir)
        os.mkdir(self.psec_pi_dir)
        clct_ckpt_dir = os.path.join(self.psec_pi_dir, 'pi')
        self.clct_ckpt_file = os.path.join(clct_ckpt_dir, 'pi')

        if comp_ris:
            self.compute_ris_estimates()
Example #4
0
class RISNNPolicy():
    def __init__(self,
                 env,
                 tr_batch: np.array,
                 val_batch,
                 num_hidden_layers,
                 num_neurons,
                 act_fn,
                 lr,
                 comp_ris=True,
                 pi_eval=None,
                 save_dir=None,
                 preload_dir=None,
                 linear_finetune=False,
                 nn_finetune=False,
                 opt='gd'):
        self.env = env
        self.tr_batch = tr_batch
        self.val_batch = val_batch

        if preload_dir is not None:
            ckpt_split = preload_dir.split('/')
            ckpt = ('/'.join(ckpt_split + [ckpt_split[1]])) + '-0'
            preload_dir = ckpt

        self.pi = NN(env.observation_space.shape[0],
                     env.action_space.n,
                     num_hidden_layers,
                     num_neurons,
                     act_fn,
                     lr,
                     ckpt=preload_dir,
                     linear_finetune=linear_finetune,
                     nn_finetune=nn_finetune,
                     scope_name='finetunepgpol',
                     opt=opt)
        self.pi_eval = pi_eval

        # processing batch of data, generating 1-hot vectors
        data_x, data_y = utils.pre_process_batch(env, None, tr_batch, V=None)

        self.train_x, self.train_y, _, _ = utils.split_data(data_x,
                                                            data_y,
                                                            ratio=0.0)
        assert len(self.train_x) == len(data_x)
        # generating separate validation set
        data_x, data_y = utils.pre_process_batch(env, None, val_batch, V=None)
        self.val_x, self.val_y, _, _ = utils.split_data(data_x,
                                                        data_y,
                                                        ratio=0.0)

        #val_batch = utils.generate_batch(self.env, self.pi_eval, 0.05 * len(self.batch))
        #self.test_x, self.test_y = utils.pre_process_batch(env, None, val_batch, V)
        print('Number of training trajs {}'.format(len(self.tr_batch)))
        print('Number of training steps {}'.format(len(self.train_x)))
        print('Number of testing trajs {}'.format(len(val_batch)))
        print('Number of testing steps {}'.format(len(self.val_x)))

        # saving setup for early stopping
        self.psec_pi_dir = 'psec_pi_dir/'
        if save_dir is not None:
            self.psec_pi_dir = save_dir
        print('saving psec net in {}'.format(self.psec_pi_dir))
        if os.path.isdir(self.psec_pi_dir):
            shutil.rmtree(self.psec_pi_dir)
        os.mkdir(self.psec_pi_dir)
        clct_ckpt_dir = os.path.join(self.psec_pi_dir, 'pi')
        self.clct_ckpt_file = os.path.join(clct_ckpt_dir, 'pi')

        if comp_ris:
            self.compute_ris_estimates()

    def action(self, s):
        s = np.reshape(s, (1, len(s)))
        return self.pi.action(s)

    def action_prob(self, s, a):
        s = np.reshape(s, (1, len(s)))
        return self.pi.action_prob(s, a)

    def step_train(self):
        self.pi.train(self.train_x, self.train_y)
        self.pi.validate(self.val_x, self.val_y)
        tr_loss = self.pi.tr_loss
        val_loss = self.pi.val_loss

        return tr_loss, val_loss

    def compute_ris_estimates(self):

        patience_count = 0
        patience_limit = 15
        print_freq = 100
        prev_val_loss = float('inf')
        prev_tr_loss = float('inf')
        epochs = 100  #20000#500000
        min_val_loss = float('inf')
        min_val_itr = 0

        for e in range(epochs):
            s = time.time()
            #inds = np.random.choice(self.train_x.shape[0], size=1)#self.train_x.shape[0])
            #train_x = self.train_x[inds,:]
            #train_y = self.train_y[inds,:]
            #self.pi.train(train_x, train_y)
            self.pi.train(self.train_x, self.train_y)
            #assert len(self.train_x) == len(train_x)
            l = time.time()
            #print ('training took {}'.format(l - s))
            #inds = np.random.choice(self.val_x.shape[0], size=self.val_x.shape[0])
            #self.pi.validate(self.val_x[inds, :], self.val_y[inds, :])
            self.pi.validate(self.val_x, self.val_y)

            tr_loss = self.pi.tr_loss
            val_loss = self.pi.val_loss

            if e % print_freq == 0:
                print('epoch {}, training loss {}, validation loss {}'.format(
                    e, tr_loss, val_loss))
                #print ('epoch {}, training loss {}'.format(e, tr_loss))#, val_loss))

            # checking slope of validation error
            '''
            error_deg = abs(np.rad2deg(np.arctan2(val_loss - prev_val_loss, 1)))
            if error_deg <= 30:
                break
            prev_val_loss = val_loss
            '''

            # overfitting critera
            '''
            if abs(tr_loss - prev_tr_loss) <= 1e-5:
                patience_count += 1
            else:
                # reset
                patience_count = 0
            prev_tr_loss = tr_loss

            if patience_count >= patience_limit:
                break
            '''

            if val_loss < min_val_loss:
                # error improved!
                min_val_loss = val_loss
                min_val_itr = e
                self.pi.save(self.clct_ckpt_file, min_val_itr)
                #print ('got min at iteration {}'.format(min_val_itr))

            # if its been at least 100 iterations since we last recorded a min, then break
            if (e - min_val_itr) >= 50:
                break

        min_ckpt_file = self.clct_ckpt_file + '-' + str(min_val_itr)
        self.pi.load(min_ckpt_file)
        shutil.rmtree(self.psec_pi_dir)