コード例 #1
0
ファイル: train.py プロジェクト: CQ6hang/MARLinEC
def train(arglist):
    # create environment
    env = Env()

    memories = [Memory(arglist.memory_size) for _ in range(env.n_user)]

    dqn = [
        DeepQNetwork(len(env.action_space[i]),
                     len(env.obs_space[i]),
                     i,
                     learning_rate=arglist.lr,
                     reward_decay=arglist.gamma,
                     e_greedy=arglist.e_greedy,
                     e_greedy_min=arglist.e_greedy_min,
                     replace_target_iter=arglist.replace_target_iter,
                     memory_size=arglist.memory_size,
                     e_greedy_decrement=arglist.e_greedy_decrement)
        for i in range(env.n_user)
    ]
    print('dqn build complete, start training...')
    for episode in range(arglist.max_episodes):
        step = 0
        rwd = [0.0 for _ in range(env.n_user)]
        a_rwd = [0.0 for _ in range(env.n_user)]
        obs = env.reset()

        while not all(env.done):
            # print(env.done)
            # print(step)
            step += 1
            actions = []
            for i in range(env.n_user):
                if np.random.uniform() < dqn[i].epsilon:
                    actions.append(
                        np.random.randint(0, len(env.action_space[i])))
                else:
                    action = dqn[i].choose_action(obs[i])
                    actions.append(action)
            obs_, reward, done = env.step(actions)

            for i in range(env.n_user):
                if not env.done[i]:
                    rwd[i] += reward[i]
                    memories[i].remember(obs[i], actions[i], reward[i],
                                         obs_[i], done[i])
                    if step % 5 == 0:
                        size = memories[i].pointer
                        batch = random.sample(
                            range(size), size
                        ) if size < arglist.batch_size else random.sample(
                            range(size), arglist.batch_size)
                        dqn[i].learn(*memories[i].sample(batch))
                else:
                    a_rwd[i] = rwd[i] / step

            obs = obs_

        if episode % 10 == 0:
            print('episode:' + str(episode) + ' steps:' + str(step) +
                  '\nreward:' + str(rwd) + '\naverage_reward:' + str(a_rwd))
コード例 #2
0
ファイル: utils.py プロジェクト: sheelabhadra/Learning2Drive
 def _init():
     set_global_seeds(seed)
     env = Env(client, frame_skip=frame_skip, vae=vae, min_throttle=MIN_THROTTLE,
         max_throttle=MAX_THROTTLE, n_command_history=N_COMMAND_HISTORY,
         n_stack=n_stack)
     env.seed(seed)
     env = Monitor(env, log_dir, allow_early_resets=True)
     return env
コード例 #3
0
class App:
    def __init__(self):
        self.env = Env()

    def choose_agent(self):
        if NI.agent == AGENT.PG:
            from agent.agent_pg import Agent_pg as Agent

        return Agent

    def set_up(self):
        self.env.prep_data()
        if MI.mode_operation == MODE_OPERATION.PREP_FEATURE or MI.mode_operation == MODE_OPERATION.PREP_ARR:
            exit()

        Agent = self.choose_agent()
        self.agent = Agent(
            self.env.prep_data_obj.dataset['hist_norm'].shape[1])
        # print(self.env.prep_data_obj.dataset['hist_norm'].shape)
        # print(self.env.prep_data_obj.dataset[f'future_{DI.main_col}'].shape)
        #
        # self.backtest.backtest()
        # print(self.env.data)

    def learn_pg(self):
        score_history = []
        for e in range(NI.rl_episode):
            done = False
            score = 0
            observation = self.env.reset()
            while not done:
                action = self.agent.choose_action(observation)
                observation_, reward, done = self.env.step(action)
                self.agent.store_transition(observation, action, reward)
                observation = observation_
                score += reward
            score_history.append(score)

            print(f'train: {e}')
            self.agent.learn()

            print(
                f'episode {e} score {score:.1f} average_score {np.mean(score_history[-100:])}'
            )
        filename = 'fname.png'

    def backtest(self):
        self.env.backtest.backtest()
コード例 #4
0
 def test_creation(self):
     env = Env(scenario=self.scenario, world=self.world)
     self.assertEqual(self.world, env.world)
コード例 #5
0
 def __init__(self):
     self.env = Env()
コード例 #6
0
def maximise_food_eaten(env: Env) -> float:
    return env.total_points()
コード例 #7
0

multiview = False
trained_model = "multiview" if multiview else "baseline"
if multiview:
    model = Model(max_scale=4,
                  steps_per_scale=int(25e3),
                  lr=1e-3,
                  multiview=True)
    model.load(f"../trained_models/{trained_model}")
    data = MultiViewData(episode_duration=21,
                         data_buffer_size=int(16e3),
                         batch_size=16)
    data.load(data_folder="../data/")
else:
    model = Model(max_scale=4,
                  steps_per_scale=int(25e3),
                  lr=1e-3)
    model.load(f"../trained_models/{trained_model}")
    data = ObservationData(obs_buffer_size=21,
                           data_buffer_size=int(16e3),
                           batch_size=16)
    data.load(data_folder="../data/")
env = Env(obs_buffer_size=21)

psnr_mean, psnr_std, ssim_mean, ssim_std = get_results(train=True)
print(f"{trained_model} training, PSNR:{psnr_mean} ({psnr_std}), SSIM:{ssim_mean} ({ssim_std})")

psnr_mean, psnr_std, ssim_mean, ssim_std = get_results(train=False)
print(f"{trained_model} testing, PSNR:{psnr_mean} ({psnr_std}), SSIM:{ssim_mean} ({ssim_std})")
コード例 #8
0
def main(_):
    gpu_options = tf.GPUOptions(allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        env = Env(MONITOR_DIR, RANDOM_SEED, FPS, sess)
        np.random.seed(RANDOM_SEED)
        tf.set_random_seed(RANDOM_SEED)

        state_dim = env.observation_space.shape
        try:
            action_dim = env.action_space.shape[0]
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            assert (np.all(env.action_space.high == -env.action_space.low))
            action_type = 'Continuous'
        except:
            action_dim = env.action_space.n
            action_bound = None
            action_type = 'Discrete'

        print(action_type)
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             ACTOR_LEARNING_RATE, TAU, action_type)

        critic = CriticNetwork(sess, state_dim, action_dim, action_bound,
                               CRITIC_LEARNING_RATE, TAU,
                               actor.get_num_trainable_vars(), action_type)

        # Initialize replay memory
        replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED)
        if action_type == 'Continuous':
            noise = OrnsteinUhlenbeckProcess(
                OU_THETA,
                mu=OU_MU,
                sigma=OU_SIGMA,
                n_steps_annealing=EXPLORATION_EPISODES)
        else:
            noise = GreedyPolicy(action_dim, EXPLORATION_EPISODES, MIN_EPSILON,
                                 MAX_EPSILON)

        agent = DDPGAgent(sess,
                          action_type,
                          actor,
                          critic,
                          GAMMA,
                          env,
                          replay_buffer,
                          noise=noise,
                          exploration_episodes=EXPLORATION_EPISODES,
                          max_episodes=MAX_EPISODES,
                          max_steps_episode=MAX_STEPS_EPISODE,
                          warmup_steps=WARMUP_STEPS,
                          mini_batch=FLAGS.mini_batch,
                          eval_episodes=EVAL_EPISODES,
                          eval_periods=EVAL_PERIODS,
                          env_render=FLAGS.env_render,
                          summary_dir=SUMMARY_DIR,
                          model_dir=MODEL_DIR,
                          model_store_periods=MODEL_STORE_PERIODS,
                          detail=DETAIL,
                          render_interval=RENDER_INTERVAL)

        while True:
            try:
                with make_carla_client('localhost', FLAGS.port) as client:
                    env.connected(client)
                    agent.train()
            except TCPConnectionError as error:
                print(error)
                time.sleep(5.0)