Beispiel #1
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    gpu_options = tensor.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    with tensor.Session(config=tensor.ConfigProto(
            gpu_options=gpu_options)) as sess:
        env = StageEnvironment(conf.max_random_start, conf.observation_dims,
                               conf.data_format, conf.display,
                               conf.use_cumulated_reward)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.action_size,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.action_size,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.action_size,
                hidden_activation_fn=tensor.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.action_size,
                hidden_activation_fn=tensor.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)
Beispiel #2
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    #gpu_options = tf.GPUOptions(
    #    per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    sess_config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=conf.allow_soft_placement)
    sess_config.gpu_options.allow_growth = conf.allow_soft_placement

    with tf.Session(config=sess_config) as sess:

        if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']):
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display,
                                 conf.use_cumulated_reward)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display, conf.use_cumulated_reward)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                data_format=conf.data_format,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                data_format=conf.data_format,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)

        merged_summary = tf.summary.merge_all()
        file_writer = tf.summary.FileWriter("tensorboardLogs", sess.graph)
Beispiel #3
0
def main(_):
  # preprocess
  conf.observation_dims = eval(conf.observation_dims)
  if conf.learning_rate < 0:
      conf.learning_rate = conf.learning_rate_minimum = 10**(np.random.random()*2-4)

  for flag in ['memory_size', 't_target_q_update_freq', 't_test',
               't_ep_end', 't_train_max', 't_learn_start',
               'learning_rate_decay_step', 'entropy_regularization_decay_step']:
    setattr(conf, flag, getattr(conf, flag) * conf.scale)
#  for flag in ['learning_rate', 'learning_rate_minimum']:
#    setattr(conf, flag, getattr(conf, flag) / conf.async_threads)

  if conf.use_gpu:
    conf.data_format = 'NCHW'
  else:
    conf.data_format = 'NHWC'

  if conf.model_dir == "":
    model_dir = get_model_dir(conf,
      ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
       't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag',
       'scale', 'model_dir', 't_train_max'])
  else:
    model_dir = 'checkpoints/' + conf.model_dir + '/'

  device = '/gpu:0' if conf.use_gpu else '/cpu:0'
  # start
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess, \
       tf.device(device):
    env_args = [conf.env_name, conf.n_action_repeat, conf.max_random_start,
                conf.observation_dims, conf.data_format, conf.display]
    if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']) :
      Env = ToyEnvironment
    else:
      Env = AtariEnvironment
    if conf.agent_type == 'Replay':
      env = Env(*env_args)
      n_actions = env.env.action_space.n
    elif conf.agent_type == 'Async':
      env = [Env(*env_args) for _ in range(conf.async_threads)]
      n_actions = env[0].env.action_space.n
    else:
      raise ValueError("Unknown agent_type: %s" % conf.agent_type)

    if conf.network_header_type in ['nature', 'nips']:
      NetworkHead = CNN
      args = {'sess': sess,
              'data_format': conf.data_format,
              'history_length': conf.history_length,
              'observation_dims': conf.observation_dims,
              'output_size': n_actions,
              'network_output_type': conf.network_output_type}
    elif conf.network_header_type == 'mlp':
      NetworkHead = MLPSmall
      args = {'sess': sess,
              'history_length': conf.history_length,
              'observation_dims': conf.observation_dims,
              'hidden_sizes': [],
              'output_size': n_actions,
              'hidden_activation_fn': tf.nn.relu,
              'network_output_type': conf.network_output_type}
    else:
      raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type))

    stat = Statistic(sess, conf.t_test, conf.t_learn_start, conf.trace_steps,
                     model_dir)

    if conf.agent_type == 'Replay':
      from agents.deep_q import DeepQ
      pred_network = NetworkHead(name='pred_network', trainable=True, **args)
      stat.create_writer(pred_network.var.values())
      target_network = NetworkHead(name='target_network', trainable=False, **args)
      agent = DeepQ(sess, pred_network, env, stat, conf,
                    target_network=target_network)
    elif conf.agent_type == 'Async':
      from agents.async import Async
      global_network = NetworkHead(name='global_network', trainable=False, **args)
      stat.create_writer(global_network.var.values())
      target_network = NetworkHead(name='target_network', trainable=False, **args)
      pred_networks = list(
        NetworkHead(name=('pred_network_%d'%i), trainable=False, **args)
        for i in range(conf.async_threads))
      if conf.disjoint_a3c:
        value_networks = list(
          NetworkHead(name=('value_network_%d'%i), trainable=False, **args)
          for i in range(conf.async_threads))
      else:
        value_networks = None
      agent = Async(sess, global_network, target_network, env, stat, conf,
                    pred_networks=pred_networks, value_networks=value_networks)
    else:
      raise ValueError('Unkown agent_type: %s' % (conf.agent_type))

    if conf.is_train:
      agent.train(conf.t_train_max)
    else:
      agent.play(conf.ep_end)
Beispiel #4
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag',
        'scale'
    ])

    # start
    with tf.Session() as sess:
        if 'Corridor' in conf.env_name:
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)
Beispiel #5
0
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    #gpu_options = tf.GPUOptions(
    #        per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))
    # TODO: just manually set for now
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']):
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display)

        if conf.network_header_type == 'rnn_cnn':
            pred_network = RNNCNN(sess=sess,
                                  data_format=conf.data_format,
                                  history_length=conf.history_length,
                                  num_steps=conf.num_steps,
                                  num_layers=conf.num_layers,
                                  attention=conf.attention,
                                  observation_dims=conf.observation_dims,
                                  output_size=env.env.action_space.n,
                                  network_header_type=conf.network_header_type,
                                  name='pred_network',
                                  trainable=True)
            target_network = RNNCNN(
                sess=sess,
                data_format=conf.data_format,
                history_length=conf.history_length,
                num_steps=conf.num_steps,
                num_layers=conf.num_layers,
                attention=conf.attention,
                observation_dims=conf.observation_dims,
                output_size=env.env.action_space.n,
                network_header_type=conf.network_header_type,
                name='target_network',
                trainable=False)
        elif conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)