def train(arglist): # create environment env = Env() memories = [Memory(arglist.memory_size) for _ in range(env.n_user)] dqn = [ DeepQNetwork(len(env.action_space[i]), len(env.obs_space[i]), i, learning_rate=arglist.lr, reward_decay=arglist.gamma, e_greedy=arglist.e_greedy, e_greedy_min=arglist.e_greedy_min, replace_target_iter=arglist.replace_target_iter, memory_size=arglist.memory_size, e_greedy_decrement=arglist.e_greedy_decrement) for i in range(env.n_user) ] print('dqn build complete, start training...') for episode in range(arglist.max_episodes): step = 0 rwd = [0.0 for _ in range(env.n_user)] a_rwd = [0.0 for _ in range(env.n_user)] obs = env.reset() while not all(env.done): # print(env.done) # print(step) step += 1 actions = [] for i in range(env.n_user): if np.random.uniform() < dqn[i].epsilon: actions.append( np.random.randint(0, len(env.action_space[i]))) else: action = dqn[i].choose_action(obs[i]) actions.append(action) obs_, reward, done = env.step(actions) for i in range(env.n_user): if not env.done[i]: rwd[i] += reward[i] memories[i].remember(obs[i], actions[i], reward[i], obs_[i], done[i]) if step % 5 == 0: size = memories[i].pointer batch = random.sample( range(size), size ) if size < arglist.batch_size else random.sample( range(size), arglist.batch_size) dqn[i].learn(*memories[i].sample(batch)) else: a_rwd[i] = rwd[i] / step obs = obs_ if episode % 10 == 0: print('episode:' + str(episode) + ' steps:' + str(step) + '\nreward:' + str(rwd) + '\naverage_reward:' + str(a_rwd))
def _init(): set_global_seeds(seed) env = Env(client, frame_skip=frame_skip, vae=vae, min_throttle=MIN_THROTTLE, max_throttle=MAX_THROTTLE, n_command_history=N_COMMAND_HISTORY, n_stack=n_stack) env.seed(seed) env = Monitor(env, log_dir, allow_early_resets=True) return env
class App: def __init__(self): self.env = Env() def choose_agent(self): if NI.agent == AGENT.PG: from agent.agent_pg import Agent_pg as Agent return Agent def set_up(self): self.env.prep_data() if MI.mode_operation == MODE_OPERATION.PREP_FEATURE or MI.mode_operation == MODE_OPERATION.PREP_ARR: exit() Agent = self.choose_agent() self.agent = Agent( self.env.prep_data_obj.dataset['hist_norm'].shape[1]) # print(self.env.prep_data_obj.dataset['hist_norm'].shape) # print(self.env.prep_data_obj.dataset[f'future_{DI.main_col}'].shape) # # self.backtest.backtest() # print(self.env.data) def learn_pg(self): score_history = [] for e in range(NI.rl_episode): done = False score = 0 observation = self.env.reset() while not done: action = self.agent.choose_action(observation) observation_, reward, done = self.env.step(action) self.agent.store_transition(observation, action, reward) observation = observation_ score += reward score_history.append(score) print(f'train: {e}') self.agent.learn() print( f'episode {e} score {score:.1f} average_score {np.mean(score_history[-100:])}' ) filename = 'fname.png' def backtest(self): self.env.backtest.backtest()
def test_creation(self): env = Env(scenario=self.scenario, world=self.world) self.assertEqual(self.world, env.world)
def __init__(self): self.env = Env()
def maximise_food_eaten(env: Env) -> float: return env.total_points()
multiview = False trained_model = "multiview" if multiview else "baseline" if multiview: model = Model(max_scale=4, steps_per_scale=int(25e3), lr=1e-3, multiview=True) model.load(f"../trained_models/{trained_model}") data = MultiViewData(episode_duration=21, data_buffer_size=int(16e3), batch_size=16) data.load(data_folder="../data/") else: model = Model(max_scale=4, steps_per_scale=int(25e3), lr=1e-3) model.load(f"../trained_models/{trained_model}") data = ObservationData(obs_buffer_size=21, data_buffer_size=int(16e3), batch_size=16) data.load(data_folder="../data/") env = Env(obs_buffer_size=21) psnr_mean, psnr_std, ssim_mean, ssim_std = get_results(train=True) print(f"{trained_model} training, PSNR:{psnr_mean} ({psnr_std}), SSIM:{ssim_mean} ({ssim_std})") psnr_mean, psnr_std, ssim_mean, ssim_std = get_results(train=False) print(f"{trained_model} testing, PSNR:{psnr_mean} ({psnr_std}), SSIM:{ssim_mean} ({ssim_std})")
def main(_): gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: env = Env(MONITOR_DIR, RANDOM_SEED, FPS, sess) np.random.seed(RANDOM_SEED) tf.set_random_seed(RANDOM_SEED) state_dim = env.observation_space.shape try: action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (np.all(env.action_space.high == -env.action_space.low)) action_type = 'Continuous' except: action_dim = env.action_space.n action_bound = None action_type = 'Discrete' print(action_type) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, ACTOR_LEARNING_RATE, TAU, action_type) critic = CriticNetwork(sess, state_dim, action_dim, action_bound, CRITIC_LEARNING_RATE, TAU, actor.get_num_trainable_vars(), action_type) # Initialize replay memory replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED) if action_type == 'Continuous': noise = OrnsteinUhlenbeckProcess( OU_THETA, mu=OU_MU, sigma=OU_SIGMA, n_steps_annealing=EXPLORATION_EPISODES) else: noise = GreedyPolicy(action_dim, EXPLORATION_EPISODES, MIN_EPSILON, MAX_EPSILON) agent = DDPGAgent(sess, action_type, actor, critic, GAMMA, env, replay_buffer, noise=noise, exploration_episodes=EXPLORATION_EPISODES, max_episodes=MAX_EPISODES, max_steps_episode=MAX_STEPS_EPISODE, warmup_steps=WARMUP_STEPS, mini_batch=FLAGS.mini_batch, eval_episodes=EVAL_EPISODES, eval_periods=EVAL_PERIODS, env_render=FLAGS.env_render, summary_dir=SUMMARY_DIR, model_dir=MODEL_DIR, model_store_periods=MODEL_STORE_PERIODS, detail=DETAIL, render_interval=RENDER_INTERVAL) while True: try: with make_carla_client('localhost', FLAGS.port) as client: env.connected(client) agent.train() except TCPConnectionError as error: print(error) time.sleep(5.0)