Exemple #1
0
print('\nInit Policies')

# agent = a2c(model_dict)
# param_file = home+'/Documents/tmp/breakout_2frames/BreakoutNoFrameskip-v4/A2C/seed0/model_params/model_params9999360.pt'

# load_policy = 1

policies = []
policies_dir = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/'
for f in os.listdir(policies_dir):
    print(f)
    policy = CNNPolicy(2, 4)  #.cuda()
    param_file = home + '/Documents/tmp/multiple_seeds_of_policies/BreakoutNoFrameskip-v4/A2C/' + f + '/model_params3/model_params9999360.pt'
    param_dict = torch.load(param_file)

    policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print('loaded params', param_file)
    policy.cuda()

    policies.append(policy)

    #just one for now
    break

# if load_policy:
# param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'

# print (param_dict.keys())
# for key in param_dict.keys():
#     print (param_dict[key].size())
load_policy = 1

if load_policy:
    # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'
    param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt'    
    param_dict = torch.load(param_file)

    # print (param_dict.keys())
    # for key in param_dict.keys():
    #     print (param_dict[key].size())

    # print (policy.state_dict().keys())
    # for key in policy.state_dict().keys():
    #     print (policy.state_dict()[key].size())

    expert_policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print ('loaded params', param_file)
expert_policy.cuda()




print ('Init Imitator Policy')
imitator_policy = CNNPolicy(2, 4).cuda()




# def save_params(save_location, model):





    #Init policy , not agent
    print ('init policy')
    policy = CNNPolicy(2*3, 18)   #frames*channels, action size

    #load params
    # param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params2000000.pt'    
    param_file = exp_dir + '/' +env_name+ 'NoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'    
    param_dict = torch.load(param_file)

    policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print ('loaded params', param_file)
    policy.cuda()









    #init env
    env = make_env_basic(env_name2)
load_policy = 1

if load_policy:
    # param_file = home+'/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params3999840.pt'
    param_file = home + '/Documents/tmp/breakout_2frames_leakyrelu2/BreakoutNoFrameskip-v4/A2C/seed0/model_params3/model_params9999360.pt'
    param_dict = torch.load(param_file)

    # print (param_dict.keys())
    # for key in param_dict.keys():
    #     print (param_dict[key].size())

    # print (policy.state_dict().keys())
    # for key in policy.state_dict().keys():
    #     print (policy.state_dict()[key].size())

    expert_policy.load_state_dict(param_dict)
    # policy = torch.load(param_file).cuda()
    print('loaded params', param_file)
expert_policy.cuda()

print('Init Imitator Policy')
imitator_policy = CNNPolicy(2, 4).cuda()

# def save_params(save_location, model):

#     #saves all params in recommended way

#     save_path = os.path.join(save_dir, 'model_params3')
#     try:
#         os.makedirs(save_path)
#     except OSError: