momentum=0.0,
                                  epsilon=RMSP_EPSILON,
                                  clip_norm=GRAD_NORM_CLIP,
                                  device=device)

    # instantiate each training thread
    # each thread is training for one target in one scene
    training_threads = []
    for i in range(PARALLEL_SIZE):
        scene, task = branches[i % NUM_TASKS]
        training_thread = A3CTrainingThread(i,
                                            global_network,
                                            initial_learning_rate,
                                            learning_rate_input,
                                            grad_applier,
                                            MAX_TIME_STEP,
                                            device=device,
                                            network_scope="thread-%d" %
                                            (i + 1),
                                            scene_scope=scene,
                                            task_scope=task)
        training_threads.append(training_thread)

    # prepare session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            allow_soft_placement=True))

    init = tf.global_variables_initializer()
    sess.run(init)

    # create tensorboard summaries
Beispiel #2
0
    # instantiate each training thread
    # each thread is training for one target in one scene
    training_threads = []

    for i in range(PARALLEL_SIZE
                   ):  #each local network has dedicated scene and a target
        scene, task = branches[i % NUM_TASKS]

        training_thread = A3CTrainingThread(
            i,
            global_network,
            global_discriminator,
            initial_learning_rate,  #each thread trained of seperate secene and task
            learning_rate_input,
            grad_applier,
            grad_applier_discriminator,
            MAX_TIME_STEP,
            device=device,
            device2=device2,
            network_scope="thread-%d" % (i + 1),
            scene_scope=scene,
            task_scope=task)
        training_threads.append(training_thread)

    # prepare session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            allow_soft_placement=True))

    init = tf.global_variables_initializer()
    sess.run(init)
Beispiel #3
0
    training_threads = []

    learning_rate_input = tf.placeholder("float")

    grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                  decay=settings.rmsp_alpha,
                                  momentum=0.0,
                                  epsilon=settings.rmsp_epsilon,
                                  clip_norm=settings.grad_norm_clip,
                                  device=device)

    for i in range(settings.parallel_agent_size):
        training_thread = A3CTrainingThread(
            i, global_network, initial_learning_rates[i], learning_rate_input,
            grad_applier, settings.max_time_step, device, settings.action_size,
            settings.gamma, settings.local_t_max, settings.entropy_beta,
            settings.agent_type, settings.performance_log_interval,
            settings.log_level, settings.random_seed)

        training_threads.append(training_thread)

    # prepare session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            allow_soft_placement=True))

    init = tf.global_variables_initializer()
    sess.run(init)

    # Statistics summary writer
    summary_writer = tf.summary.FileWriter(LOG_FILE, sess.graph)
    statistics = Statistics(sess, summary_writer, settings.average_summary)
    grad_applier = tf.train.RMSPropOptimizer(
        learning_rate=learning_rate_input,
        decay=RMSP_ALPHA,
        momentum=0.0,
        epsilon=RMSP_EPSILON)

    # instantiate each training thread
    training_threads = []

    for i in range(PARALLEL_SIZE):
        training_thread = A3CTrainingThread(
            config,
            env,
            i,
            global_network_scope,
            initial_learning_rate,
            learning_rate_input,
            grad_applier,
            MAX_TIME_STEP,
            device=device
        )
        training_threads.append(training_thread)

    # prepare session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                            allow_soft_placement=True))

    init = tf.global_variables_initializer()
    sess.run(init)

    # create tensorboard summaries