Exemple #1
0
    def __init__(self, hparams):

        self.obs_shape = hparams['obs_shape']
        self.n_actions = hparams['n_actions']

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']

        self.actor_critic = CNNPolicy(self.obs_shape[0],
                                      self.n_actions)  #.cuda()

        # Storing rollouts
        self.rollouts = RolloutStorage(self.num_steps, self.num_processes,
                                       self.obs_shape, self.n_actions)

        # if self.cuda:
        self.actor_critic.cuda()
        self.rollouts.cuda()

        self.optimizer = optim.Adam(params=self.actor_critic.parameters(),
                                    lr=hparams['lr'],
                                    eps=hparams['eps'])

        self.hparams = hparams
Exemple #2
0
    def __init__(self, hparams):

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.obs_shape = hparams['obs_shape']
        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']

        self.next_state_pred_ = hparams['next_state_pred_']

        # Policy and Value network
        if 'traj_action_mask' in hparams and hparams['traj_action_mask']:
            self.actor_critic = CNNPolicy_trajectory_action_mask(
                self.obs_shape[0], hparams['action_space'])
        else:
            self.actor_critic = CNNPolicy(self.obs_shape[0],
                                          hparams['action_space'])

        # Storing rollouts
        self.rollouts = RolloutStorage(self.num_steps, self.num_processes,
                                       self.obs_shape, hparams['action_space'])

        if self.cuda:
            self.actor_critic.cuda()
            self.rollouts.cuda()

        #Optimizer
        if self.opt == 'rms':
            self.optimizer = optim.RMSprop(
                params=self.actor_critic.parameters(),
                lr=hparams['lr'],
                eps=hparams['eps'],
                alpha=hparams['alpha'])
        elif self.opt == 'adam':
            self.optimizer = optim.Adam(params=self.actor_critic.parameters(),
                                        lr=hparams['lr'],
                                        eps=hparams['eps'])
        elif self.opt == 'sgd':
            self.optimizer = optim.SGD(params=self.actor_critic.parameters(),
                                       lr=hparams['lr'],
                                       momentum=hparams['mom'])
        else:
            print('no opt specified')

        self.action_shape = 1

        if hparams['gif_'] or hparams['ls_'] or hparams['vae_'] or hparams[
                'grad_var_']:
            self.rollouts_list = RolloutStorage_list()

        self.hparams = hparams
Exemple #3
0
    def __init__(self, envs, hparams):

        self.use_gae = hparams['use_gae']
        self.gamma = hparams['gamma']
        self.tau = hparams['tau']

        self.obs_shape = hparams['obs_shape']
        self.num_steps = hparams['num_steps']
        self.num_processes = hparams['num_processes']
        self.value_loss_coef = hparams['value_loss_coef']
        self.entropy_coef = hparams['entropy_coef']
        self.cuda = hparams['cuda']
        self.opt = hparams['opt']
        self.grad_clip = hparams['grad_clip']

        # Policy and Value network

        # if hparams['dropout'] == True:
        #     print ('CNNPolicy_dropout2')
        #     self.actor_critic = CNNPolicy_dropout2(self.obs_shape[0], envs.action_space)
        # elif len(envs.observation_space.shape) == 3:
        #     print ('CNNPolicy2')
        #     self.actor_critic = CNNPolicy2(self.obs_shape[0], envs.action_space)
        # else:
        #     self.actor_critic = MLPPolicy(self.obs_shape[0], envs.action_space)

        if 'traj_action_mask' in hparams and hparams['traj_action_mask']:
            self.actor_critic = CNNPolicy_trajectory_action_mask(
                self.obs_shape[0], envs.action_space)
        else:
            self.actor_critic = CNNPolicy(self.obs_shape[0], envs.action_space)

        # Storing rollouts
        self.rollouts = RolloutStorage(self.num_steps, self.num_processes,
                                       self.obs_shape, envs.action_space)

        if self.cuda:
            self.actor_critic.cuda()
            self.rollouts.cuda()

        #Optimizer
        if self.opt == 'rms':
            self.optimizer = optim.RMSprop(
                params=self.actor_critic.parameters(),
                lr=hparams['lr'],
                eps=hparams['eps'],
                alpha=hparams['alpha'])
        elif self.opt == 'adam':
            self.optimizer = optim.Adam(params=self.actor_critic.parameters(),
                                        lr=hparams['lr'],
                                        eps=hparams['eps'])
        elif self.opt == 'sgd':
            self.optimizer = optim.SGD(params=self.actor_critic.parameters(),
                                       lr=hparams['lr'],
                                       momentum=hparams['mom'])
        else:
            print('no opt specified')

        # if envs.action_space.__class__.__name__ == "Discrete":
        #     action_shape = 1
        # else:
        #     action_shape = envs.action_space.shape[0]
        # self.action_shape = action_shape
        self.action_shape = 1
        # if __:
        #     self.deterministic_action = 0
        # else:
        #     self.deterministic_action = 0
        if hparams['gif_'] or hparams['ls_']:
            self.rollouts_list = RolloutStorage_list()

        self.hparams = hparams
Exemple #4
0
        state_dataset.append(dataset[i][t][1])  #  /255.)

print(len(state_dataset))

print('\nInit Policies')

# agent = a2c(model_dict)
# param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt'

# load_policy = 1

policies = []
policies_dir = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'
for f in os.listdir(policies_dir):
    print(f)
    policy = CNNPolicy(2, 4)  #.cuda()
    param_file = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt'
    param_dict = torch.load(param_file)

    policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print('loaded params', param_file)
    policy.cuda()

    policies.append(policy)

    #just one for now
    break

# if load_policy:
# param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'
print(len(state_dataset))

# fdsfds

print('\nInit Policies')
# agent = a2c(model_dict)
# param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt'
# load_policy = 1
policies = []
# policies_dir = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'
policies_dir = home + '/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/'
for f in os.listdir(policies_dir):
    print(f)
    # policy = CNNPolicy(2, 4) #.cuda()
    policy = CNNPolicy(2, 18)  #.cuda()   #num-frames, nyum-actions

    # param_file = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt'
    param_file = home + '/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt'
    param_dict = torch.load(param_file)

    policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print('loaded params', param_file)
    policy.cuda()

    policies.append(policy)

    #just one for now
    break
Exemple #6
0
    #load experiemetn dict
    print ('load experiment dict')
    dict_location = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_dict.json'
    with open(dict_location, 'r') as outfile:
        exp_dict = json.load(outfile)







    #Init policy , not agent
    print ('init policy')
    policy = CNNPolicy(2*3, 18)   #frames*channels, action size

    #load params
    # param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params2000000.pt'    
    param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'    
    param_dict = torch.load(param_file)

    policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print ('loaded params', param_file)
    policy.cuda()




Exemple #7
0
    def __init__(self, hparams):

        self.obs_shape = hparams['obs_shape']
        self.n_actions = hparams['n_actions']

        self.actor_critic = CNNPolicy(self.obs_shape[0], self.n_actions).cuda()
# dataset: trajectories: timesteps: (action,state) state: [2,84,84]

print(len(dataset))
print(len(dataset[ii][0]))  # single timepoint
print(dataset[ii][0][0].shape)  #action [1]           a_t+1
print(dataset[ii][0][1].shape)  #state [2,84,84]   s_t

state_dataset = []
for i in range(len(dataset)):
    for t in range(len(dataset[i])):
        state_dataset.append(dataset[i][t][1])  #  /255.)

print(len(state_dataset))

print('Init Expert Policy')
expert_policy = CNNPolicy(2, 4)  #.cuda()
# agent = a2c(model_dict)
# param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt'
load_policy = 1

if load_policy:
    # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'
    param_file = home + '/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt'
    param_dict = torch.load(param_file)

    # print (param_dict.keys())
    # for key in param_dict.keys():
    #     print (param_dict[key].size())

    # print (policy.state_dict().keys())
    # for key in policy.state_dict().keys():
Exemple #9
0
#     param_file = home+'/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt'
#     param_dict = torch.load(param_file)

#     policy.load_state_dict(param_dict)
#     # policy = torch.load(param_file).cuda()
#     print ('loaded params', param_file)
#     policy.cuda()

#     policies.append(policy)

#     #just one for now
#     break

# policy = policies[0]

policy = CNNPolicy(2 * 3, 18)  #.cuda()   #num-frames* channels, num-actions

# param_file = home+'/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt'
# param_file = home+'/Documents/tmp/RoadRunner/RoadRunnerNoFrameskip-v4/A2C/'+f+'/model_params3/model_params9999360.pt'
param_file = home + '/Documents/tmp/' + exp_name + '/' + env_name + 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'

param_dict = torch.load(param_file)
policy.load_state_dict(param_dict)
# policy = torch.load(param_file).cuda()
print('loaded params', param_file)
policy.cuda()


class MASK_PREDICTOR(nn.Module):
    def __init__(self):
        super(MASK_PREDICTOR, self).__init__()