momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) # instantiate each training thread # each thread is training for one target in one scene training_threads = [] for i in range(PARALLEL_SIZE): scene, task = branches[i % NUM_TASKS] training_thread = A3CTrainingThread(i, global_network, initial_learning_rate, learning_rate_input, grad_applier, MAX_TIME_STEP, device=device, network_scope="thread-%d" % (i + 1), scene_scope=scene, task_scope=task) training_threads.append(training_thread) # prepare session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # create tensorboard summaries
# instantiate each training thread # each thread is training for one target in one scene training_threads = [] for i in range(PARALLEL_SIZE ): #each local network has dedicated scene and a target scene, task = branches[i % NUM_TASKS] training_thread = A3CTrainingThread( i, global_network, global_discriminator, initial_learning_rate, #each thread trained of seperate secene and task learning_rate_input, grad_applier, grad_applier_discriminator, MAX_TIME_STEP, device=device, device2=device2, network_scope="thread-%d" % (i + 1), scene_scope=scene, task_scope=task) training_threads.append(training_thread) # prepare session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init)
training_threads = [] learning_rate_input = tf.placeholder("float") grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=settings.rmsp_alpha, momentum=0.0, epsilon=settings.rmsp_epsilon, clip_norm=settings.grad_norm_clip, device=device) for i in range(settings.parallel_agent_size): training_thread = A3CTrainingThread( i, global_network, initial_learning_rates[i], learning_rate_input, grad_applier, settings.max_time_step, device, settings.action_size, settings.gamma, settings.local_t_max, settings.entropy_beta, settings.agent_type, settings.performance_log_interval, settings.log_level, settings.random_seed) training_threads.append(training_thread) # prepare session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # Statistics summary writer summary_writer = tf.summary.FileWriter(LOG_FILE, sess.graph) statistics = Statistics(sess, summary_writer, settings.average_summary)
grad_applier = tf.train.RMSPropOptimizer( learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON) # instantiate each training thread training_threads = [] for i in range(PARALLEL_SIZE): training_thread = A3CTrainingThread( config, env, i, global_network_scope, initial_learning_rate, learning_rate_input, grad_applier, MAX_TIME_STEP, device=device ) training_threads.append(training_thread) # prepare session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # create tensorboard summaries