예제 #1
0
def main(args):
    logging.debug('Configuration: {}'.format(args))

    network, env_creator = get_network_and_environment_creator(args)
    learner = PAACLearner(network, env_creator, args)

    logging.info('Starting training')
    learner.train()
    logging.info('Finished training')
예제 #2
0
def main(args):
    logging.debug('Configuration: {}'.format(args))
    logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING)

    network_creator = get_network_creator(args)

    learner = PAACLearner(network_creator, args)

    setup_kill_signal_handler(learner)

    logging.info('Starting training')
    learner.train()
    logging.info('Finished training')
예제 #3
0
파일: train.py 프로젝트: VickeeX/D3RL
def main(args):
    logging.debug('Configuration: {}'.format(args))
    logging.getLogger("wergzrug").setLevel(logging.WARNING)
    logging.getLogger("tensorflow").setLevel(logging.WARNING)

    network_creator, env_creator = get_network_and_environment_creator(args)

    learner = PAACLearner(network_creator, env_creator, args)

    setup_kill_signal_handler(learner)

    logging.info('Starting training')
    learner.train()
    logging.info('Finished training')
예제 #4
0
def main(args):
    logging.debug('Configuration: {}'.format(args))

    explo_policy = ExplorationPolicy(args)
    print('Repetition table : ' + str(explo_policy.tab_rep))

    network_creator, env_creator = get_network_and_environment_creator(
        args, explo_policy)

    learner = PAACLearner(network_creator, env_creator, explo_policy, args)

    setup_kill_signal_handler(learner)

    logging.info('Starting training')
    learner.train()
    logging.info('Finished training')
예제 #5
0
def main(args):
    logging.debug('Configuration: {}'.format(args))

    if args.random_seed is None:
        rng = random.RandomState(int(time()))
        random_seed = rng.randint(1000)
        args.random_seed = random_seed
    network_creator, env_creator = get_network_and_environment_creator(args)

    learner = PAACLearner(network_creator, env_creator, args)

    setup_kill_signal_handler(learner)

    logging.info('Starting training')
    learner.train()
    logging.info('Finished training')
예제 #6
0
def main(args):
    network_creator, env_creator = get_network_and_environment_creator(args)

    utils.save_args(args, args.debugging_folder, file_name=ARGS_FILE)
    logging.info('Saved args in the {0} folder'.format(args.debugging_folder))
    logging.info(args_to_str(args))

    batch_env = ConcurrentBatchEmulator(WorkerProcess, env_creator,
                                        args.num_workers, args.num_envs)
    set_exit_handler(concurrent_emulator_handler(batch_env))
    try:
        batch_env.start_workers()
        learner = PAACLearner(network_creator, batch_env, args)
        learner.set_eval_function(eval_network, learner.network, env_creator,
                                  50, learner.use_rnn)  # args to eval_network
        learner.train()
    finally:
        batch_env.close()
예제 #7
0
            episode_over = False
            reward = 0.0
            while not episode_over:
                if args.embedding_plot and random.random() < 0.2:
                    latent_var = sess.run(network.encoder_output,
                                          feed_dict={
                                              network.autoencoder_input_ph:
                                              np.array([state[:, :,
                                                              0]]).reshape(
                                                                  1, 84, 84, 1)
                                          })
                    img_database.append(state[:, :, 0])
                    latent_database.append(latent_var[0])

                action = PAACLearner.choose_next_actions(
                    network, env_creator.num_actions, [state], sess)
                state, r, episode_over = environment.next(action[0])
                reward += r
            rewards.append(reward)
            print(reward)
        print("Mean:", np.mean(rewards), "Min:", np.min(rewards), "Max:",
              np.max(rewards), "Std:", np.std(rewards))

    # Generate a plot for visualizing the latent space learnt by the autoencoder
    if args.embedding_plot:
        from sklearn.manifold import TSNE
        manifold = TSNE(n_components=2)
        x_fitted = manifold.fit(np.array(latent_database))
        print("Plotting TSNE of the latent space...")
        fig, ax = plt.subplots()
        num_imgs = 30
예제 #8
0
    if 'gpu' in args.device:
        config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        checkpoints_ = os.path.join(df, 'checkpoints')
        network.init(checkpoints_, saver, sess)
        states = np.asarray(
            [environment.get_initial_state() for environment in environments])
        if args.noops != 0:
            for i, environment in enumerate(environments):
                for _ in range(random.randint(0, args.noops)):
                    state, _, _ = environment.next(environment.get_noop())
                    states[i] = state

        episodes_over = np.zeros(args.test_count, dtype=np.bool)
        rewards = np.zeros(args.test_count, dtype=np.float32)
        while not all(episodes_over):
            actions, _, _ = PAACLearner.choose_next_actions(
                network, env_creator.num_actions, states, sess)
            for j, environment in enumerate(environments):
                state, r, episode_over = environment.next(actions[j])
                states[j] = state
                rewards[j] += r
                episodes_over[j] = episode_over

        print('Performed {} tests for {}.'.format(args.test_count, args.game))
        print('Mean: {0:.2f}'.format(np.mean(rewards)))
        print('Min: {0:.2f}'.format(np.min(rewards)))
        print('Max: {0:.2f}'.format(np.max(rewards)))
        print('Std: {0:.2f}'.format(np.std(rewards)))
예제 #9
0
        edr_outs = get_save_frame(edr_name)
    with tf.Session(config=config) as sess:
        checkpoints_ = os.path.join(df, 'checkpoints')
        network.init(checkpoints_, saver, sess)
        states = np.asarray(
            [environment.get_initial_state() for environment in environments])
        if args.noops != 0:
            for i, environment in enumerate(environments):
                for _ in range(random.randint(0, args.noops)):
                    state, _, _ = environment.next(environment.get_noop())
                    states[i] = state

        episodes_over = np.zeros(args.test_count, dtype=np.bool)
        rewards = np.zeros(args.test_count, dtype=np.float32)
        while not all(episodes_over):
            actions, edr_output, _, _, r_s, r_m, r_l = PAACLearner.choose_next_actions_with_viz(
                network, env_creator.num_actions, states, sess, True)
            #red = visualize(states, edr_output)
            #print("states shape", states.shape)
            #print("r_s", r_s.shape)
            #print("edr output shape",edr_output.shape)
            if args.edr_viz:
                edr_outs(edr_output[3, :, :, 0].reshape([84, 84]))
            # edr_outs(visualize(states, edr_output))
            #edr_outs(r_s.reshape([84,84]))
            for j, environment in enumerate(environments):
                state, r, episode_over = environment.next(actions[j])
                states[j] = state
                rewards[j] += r
                episodes_over[j] = episode_over

        print('Performed {} tests for {}.'.format(args.test_count, args.game))