def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start gpu_options = tensor.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) with tensor.Session(config=tensor.ConfigProto( gpu_options=gpu_options)) as sess: env = StageEnvironment(conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.action_size, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.action_size, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.action_size, hidden_activation_fn=tensor.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.action_size, hidden_activation_fn=tensor.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start #gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) sess_config = tf.ConfigProto( log_device_placement=False, allow_soft_placement=conf.allow_soft_placement) sess_config.gpu_options.allow_growth = conf.allow_soft_placement with tf.Session(config=sess_config) as sess: if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']): env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, data_format=conf.data_format, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, data_format=conf.data_format, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end) merged_summary = tf.summary.merge_all() file_writer = tf.summary.FileWriter("tensorboardLogs", sess.graph)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) if conf.learning_rate < 0: conf.learning_rate = conf.learning_rate_minimum = 10**(np.random.random()*2-4) for flag in ['memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step', 'entropy_regularization_decay_step']: setattr(conf, flag, getattr(conf, flag) * conf.scale) # for flag in ['learning_rate', 'learning_rate_minimum']: # setattr(conf, flag, getattr(conf, flag) / conf.async_threads) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' if conf.model_dir == "": model_dir = get_model_dir(conf, ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale', 'model_dir', 't_train_max']) else: model_dir = 'checkpoints/' + conf.model_dir + '/' device = '/gpu:0' if conf.use_gpu else '/cpu:0' # start gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess, \ tf.device(device): env_args = [conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display] if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']) : Env = ToyEnvironment else: Env = AtariEnvironment if conf.agent_type == 'Replay': env = Env(*env_args) n_actions = env.env.action_space.n elif conf.agent_type == 'Async': env = [Env(*env_args) for _ in range(conf.async_threads)] n_actions = env[0].env.action_space.n else: raise ValueError("Unknown agent_type: %s" % conf.agent_type) if conf.network_header_type in ['nature', 'nips']: NetworkHead = CNN args = {'sess': sess, 'data_format': conf.data_format, 'history_length': conf.history_length, 'observation_dims': conf.observation_dims, 'output_size': n_actions, 'network_output_type': conf.network_output_type} elif conf.network_header_type == 'mlp': NetworkHead = MLPSmall args = {'sess': sess, 'history_length': conf.history_length, 'observation_dims': conf.observation_dims, 'hidden_sizes': [], 'output_size': n_actions, 'hidden_activation_fn': tf.nn.relu, 'network_output_type': conf.network_output_type} else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, conf.trace_steps, model_dir) if conf.agent_type == 'Replay': from agents.deep_q import DeepQ pred_network = NetworkHead(name='pred_network', trainable=True, **args) stat.create_writer(pred_network.var.values()) target_network = NetworkHead(name='target_network', trainable=False, **args) agent = DeepQ(sess, pred_network, env, stat, conf, target_network=target_network) elif conf.agent_type == 'Async': from agents.async import Async global_network = NetworkHead(name='global_network', trainable=False, **args) stat.create_writer(global_network.var.values()) target_network = NetworkHead(name='target_network', trainable=False, **args) pred_networks = list( NetworkHead(name=('pred_network_%d'%i), trainable=False, **args) for i in range(conf.async_threads)) if conf.disjoint_a3c: value_networks = list( NetworkHead(name=('value_network_%d'%i), trainable=False, **args) for i in range(conf.async_threads)) else: value_networks = None agent = Async(sess, global_network, target_network, env, stat, conf, pred_networks=pred_networks, value_networks=value_networks) else: raise ValueError('Unkown agent_type: %s' % (conf.agent_type)) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start with tf.Session() as sess: if 'Corridor' in conf.env_name: env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start #gpu_options = tf.GPUOptions( # per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) # TODO: just manually set for now gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']): env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) if conf.network_header_type == 'rnn_cnn': pred_network = RNNCNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, num_steps=conf.num_steps, num_layers=conf.num_layers, attention=conf.attention, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = RNNCNN( sess=sess, data_format=conf.data_format, history_length=conf.history_length, num_steps=conf.num_steps, num_layers=conf.num_layers, attention=conf.attention, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)