q_target = b_r + GAMMA * (1. - b_d) * q_next q_target = q_target.detach() # loss loss = self.loss_function(q_eval, q_target) logger.store(loss=loss) # backprop loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss dqn = DQN() logdir = './DQN/%s' % args.games + '/%i' % int(time.time()) logger_kwargs = setup_logger_kwargs(args.games, args.seed, data_dir=logdir) logger = EpochLogger(**logger_kwargs) kwargs = { 'seed': args.seed, 'learning rate':args.lr, } logger.save_config(kwargs) # model load with check if LOAD and os.path.isfile(PRED_PATH) and os.path.isfile(TARGET_PATH): dqn.load_model() pkl_file = open(RESULT_PATH,'rb') result = pickle.load(pkl_file) pkl_file.close() print('Load complete!') else:
# logger.log_tabular('VVals', with_min_and_max=True) # logger.log_tabular('LogPi', with_min_and_max=True) # logger.log_tabular('LossPi', average_only=True) # logger.log_tabular('LossQ1', average_only=True) # logger.log_tabular('LossQ2', average_only=True) # logger.log_tabular('LossV', average_only=True) # logger.log_tabular('Time', time.time()-start_time) # logger.dump_tabular() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') parser.add_argument('--hid', type=int, default=300) parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--exp_name', type=str, default='sac') args = parser.parse_args() from logx import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) sac(lambda: gym.make(args.env), ac_kwargs=dict(hidden_sizes=[args.hid] * args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, logger_kwargs=logger_kwargs)
env = make_env(util.ENV_CONFIG_DIR + env_config) obs = [] actions = [] action_sign = np.array([-1, -1]) for i in range(iterations): current_bound = initial_bound o = env.reset() real_action = env.action_space.default() * 0.5 for t in range(max_ep_len): o, r, d, _ = env.step(real_action) obs.append(o) actions.append(real_action) vp = o vi = np.mean(obs[-5:]) vd = np.mean(np.diff(obs, axis=0)[-5:]) vd = 0 if np.isnan(vd) else vd delta = np.exp((wp * vp + wi * vi + wd * vd) * action_sign) delta = np.clip(delta, 1. / current_bound, current_bound) #print(real_action, o, delta) real_action = env.action_space.clip(real_action * delta) current_bound = np.maximum(final_bound, current_bound * bound_decay) print(np.mean(np.abs(obs[-20:])) * 100) logger_kwargs = setup_logger_kwargs(exp_name, seed, data_dir=util.LOG_DIR + os.path.splitext(env_config)[0]) logger = EpochLogger(**logger_kwargs) #util.plot_seq_obs_and_actions(np.abs(obs), actions, env.action_space.high, logger.output_dir + '/actions.png')
loss.backward() self.optimizer.step() loss = self.loss_function(q_eval2, q_target1) self.optimizer1.zero_grad() loss.backward() self.optimizer1.step() return loss dqn = Smoothing_DQN() logdir = './DOUBLE_average_choose_DQN/%s' % args.games + '/%i' % int( time.time()) logger_kwargs = setup_logger_kwargs(args.games + "DOUBLE_average_choose", args.seed, data_dir=logdir) logger = EpochLogger(**logger_kwargs) kwargs = { 'seed': args.seed, 'delay_interval': dealy_interval, } logger.save_config(kwargs) # model load with check if LOAD and os.path.isfile(PRED_PATH) and os.path.isfile(TARGET_PATH): dqn.load_model() pkl_file = open(RESULT_PATH, 'rb') result = pickle.load(pkl_file) pkl_file.close() print('Load complete!') else: