target_net.load_state_dict(policy_net.state_dict()) # create optimizer #opt = optim.RMSprop(policy_net.parameters(), # lr=info["RMS_LEARNING_RATE"], # momentum=info["RMS_MOMENTUM"], # eps=info["RMS_EPSILON"], # centered=info["RMS_CENTERED"], # alpha=info["RMS_DECAY"]) opt = optim.Adam(policy_net.parameters(), lr=info['ADAM_LEARNING_RATE']) if args.model_loadpath is not '': # what about random states - they will be wrong now??? # TODO - what about target net update cnt target_net.load_state_dict(model_dict['target_net_state_dict']) policy_net.load_state_dict(model_dict['policy_net_state_dict']) opt.load_state_dict(model_dict['optimizer']) print("loaded model state_dicts") if args.buffer_loadpath == '': args.buffer_loadpath = args.model_loadpath.replace( '.pkl', '_train_buffer.npz') print("auto loading buffer from:%s" % args.buffer_loadpath) try: replay_memory.load_buffer(args.buffer_loadpath) except Exception as e: print(e) print( 'not able to load from buffer: %s. exit() to continue with empty buffer' % args.buffer_loadpath) train_sim(start_step_number, start_last_save)
print("using randomized prior") policy_net = NetWithPrior(policy_net, prior_net, info['PRIOR_SCALE']) target_net = NetWithPrior(target_net, prior_net, info['PRIOR_SCALE']) target_net.load_state_dict(policy_net.state_dict()) opt = optim.Adam(policy_net.parameters(), lr=info['ADAM_LEARNING_RATE']) if load_model: # what about random states - they will be wrong now??? # TODO - what about target net update cnt (TODO from johana) target_net.load_state_dict(model_dict['target_net_state_dict']) policy_net.load_state_dict(model_dict['policy_net_state_dict']) opt.load_state_dict(model_dict['optimizer']) print("loaded model state_dicts") buffer_loadpath = info['model_loadpath'].replace('.pkl', '_train_buffer.npz') print("auto loading buffer from:%s" %buffer_loadpath) try: replay_memory.load_buffer(buffer_loadpath) except Exception as e: print(e) print('not able to load from buffer: %s. exit() to continue with empty buffer' %buffer_loadpath) advice_net=None if info['advice_flg']: print('loading advice model from: %s' %info['advicemodel_loadpath']) model_dict = torch.load(info['advicemodel_loadpath']) advice_net = EnsembleNet(n_ensemble=info['N_ENSEMBLE'], n_actions=env.num_actions, network_output_size=info['NETWORK_INPUT_SIZE'][0], num_channels=info['HISTORY_SIZE'], dueling=info['DUELING']).to(info['DEVICE']) if info['PRIOR']: advice_net = NetWithPrior(advice_net, prior_net, info['PRIOR_SCALE']) action_getter = ActionGetter(n_actions=env.num_actions, policy_net=policy_net,