예제 #1
0
 def testMLPSmall(self):
     with tf.Session(config=tf.ConfigProto( )) as sess:
         MLPSmall(sess=sess,
                  observation_dims=[80, 80],
                  history_length=4,
                  output_size=18,
                  hidden_activation_fn=tf.sigmoid,
                  network_output_type='normal',
                  name='pred_network', trainable=True)
예제 #2
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    gpu_options = tensor.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    with tensor.Session(config=tensor.ConfigProto(
            gpu_options=gpu_options)) as sess:
        env = StageEnvironment(conf.max_random_start, conf.observation_dims,
                               conf.data_format, conf.display,
                               conf.use_cumulated_reward)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.action_size,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.action_size,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.action_size,
                hidden_activation_fn=tensor.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.action_size,
                hidden_activation_fn=tensor.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)
예제 #3
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    #gpu_options = tf.GPUOptions(
    #    per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    sess_config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=conf.allow_soft_placement)
    sess_config.gpu_options.allow_growth = conf.allow_soft_placement

    with tf.Session(config=sess_config) as sess:

        if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']):
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display,
                                 conf.use_cumulated_reward)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display, conf.use_cumulated_reward)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                data_format=conf.data_format,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                data_format=conf.data_format,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)

        merged_summary = tf.summary.merge_all()
        file_writer = tf.summary.FileWriter("tensorboardLogs", sess.graph)
예제 #4
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag',
        'scale'
    ])

    # start
    with tf.Session() as sess:
        if 'Corridor' in conf.env_name:
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)
예제 #5
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    #gpu_options = tf.GPUOptions(
    #        per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))
    # TODO: just manually set for now
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']):
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display)

        if conf.network_header_type == 'rnn_cnn':
            pred_network = RNNCNN(sess=sess,
                                  data_format=conf.data_format,
                                  history_length=conf.history_length,
                                  num_steps=conf.num_steps,
                                  num_layers=conf.num_layers,
                                  attention=conf.attention,
                                  observation_dims=conf.observation_dims,
                                  output_size=env.env.action_space.n,
                                  network_header_type=conf.network_header_type,
                                  name='pred_network',
                                  trainable=True)
            target_network = RNNCNN(
                sess=sess,
                data_format=conf.data_format,
                history_length=conf.history_length,
                num_steps=conf.num_steps,
                num_layers=conf.num_layers,
                attention=conf.attention,
                observation_dims=conf.observation_dims,
                output_size=env.env.action_space.n,
                network_header_type=conf.network_header_type,
                name='target_network',
                trainable=False)
        elif conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)