コード例 #1
0
def main():
    w = Watcher('mops/mp1', 'mops/mp2')

    thread1 = Thread(target=w.run)

    thread1.start()

    # To make the output stable across runs
    reset_graph()

    # [nr_file1, nr_file2, p_file1]
    n_inputs = 3
    n_hidden = 2
    # whether it chooses to switch file positions
    n_outputs = 1

    learning_rate = 0.1

    #Initializer capable of adapting its scale to the shape of weights tensors.
    initializer = tf.variance_scaling_initializer()

    # inserts a placeholder for a tensor that will be always fed
    # Basicamente estamos a criar uma variavel que aceita n (None) entradas de tamanho igual
    # ao n_inputs
    X = tf.placeholder(tf.float32, shape=[None, n_inputs])

    hidden = tf.layers.dense(X,
                             n_hidden,
                             activation=tf.nn.elu,
                             kernel_initializer=initializer)

    logits = tf.layers.dense(hidden, n_outputs)

    outputs = tf.nn.sigmoid(logits)  # probability of action 0 (not to migrate)

    p_migrations = tf.concat(axis=1, values=[outputs, 1 - outputs])

    action = tf.multinomial(tf.log(p_migrations), num_samples=1)

    # tf.summary.scalar('action', action)

    y = 1. - tf.to_float(action)

    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,
                                                            logits=logits)

    # tf.summary.scalar([name1, name2], cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    # correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(y,1))
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    grads_and_vars = optimizer.compute_gradients(cross_entropy)

    gradients = [grad for grad, variable in grads_and_vars]
    tf.summary.histogram("gradients", gradients)
    gradient_placeholders = []
    grads_and_vars_feed = []

    for grad, variable in grads_and_vars:
        gradient_placeholder = tf.placeholder(tf.float32,
                                              shape=grad.get_shape())
        gradient_placeholders.append(gradient_placeholder)
        grads_and_vars_feed.append((gradient_placeholder, variable))

    training_op = optimizer.apply_gradients(grads_and_vars_feed)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    n_runs_per_update = 10
    n_max_steps = 10
    n_iterations = 20
    save_iterations = 5
    discount_rate = 0.95

    env = Environment(
        [w.get_nr_of_reads('fich1.txt'),
         w.get_nr_of_reads('fich2.txt'), 1], w)

    # tf.reset_default_graph()
    with tf.Session() as sess:
        sess.run(init)
        # merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(LOGDIR + "lr_0.2")
        writer.add_graph(sess.graph)
        # init.run()
        for iteration in range(n_iterations):
            print("=" * 79)
            print("\rIteration: {}".format(iteration), end="")
            all_rewards = []
            all_gradients = []
            for game in range(n_runs_per_update):
                current_rewards = []
                current_gradients = []
                obs = env.reset()
                print(obs)
                for step in range(n_max_steps):
                    action_val, gradients_val = sess.run(
                        [action, gradients],
                        feed_dict={X: np.asarray(obs).reshape(1, n_inputs)})
                    print("WOW")
                    print(action_val)
                    print("WOW")
                    print(action_val[0][0])
                    obs, reward = env.step(action_val[0][0])
                    print(obs)
                    current_rewards.append(reward)
                    current_gradients.append(gradients_val)
                print(current_rewards)
                all_rewards.append(current_rewards)
                all_gradients.append(current_gradients)
                w.reset()

            all_rewards = discount_and_normalize_rewards(
                all_rewards, discount_rate=discount_rate)
            feed_dict = {}
            for var_index, gradient_placeholder in enumerate(
                    gradient_placeholders):
                mean_gradients = np.mean([
                    reward * all_gradients[game_index][step][var_index]
                    for game_index, rewards in enumerate(all_rewards)
                    for step, reward in enumerate(rewards)
                ],
                                         axis=0)
                feed_dict[gradient_placeholder] = mean_gradients
            sess.run(training_op, feed_dict=feed_dict)

            writer.add_summary(gradients_val, iteration)

            config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
            tf.contrib.tensorboard.plugins.projector.visualize_embeddings(
                writer, config)

            if iteration % save_iterations == 0:
                saver.save(sess, "./my_policy_net_pg.ckpt")