Example #1
0
def main(_):

    config = AttrDict(default_config())
    # Define Agent that train with REINFORCE algorithm.
    agent = REINFORCE(config)

    train_results = []

    start_time = time.time()
    # Train for num_iters times.
    for i, result in enumerate(agent.train(config.num_episodes)):
        evals = np.mean([agent.policy.evaluate() for _ in range(5)])
        result = result._replace(eval=np.mean(evals))

        print('\rEpisode {}/{} policy loss ({}), value loss ({}), eval ({})'.
              format(i, config.num_episodes, result.policy_loss,
                     result.val_loss, result.eval),
              end='',
              flush=True)

        train_results.append(result)

    end_time = time.time()
    duration = end_time - start_time
    # 100 episodes, Process duration: 59.979570150375366[s]
    print('\nProcess duration: {0}[s]'.format(duration))

    plot_agent_stats(train_results, 'REINFORCE algorithm.')
    plt.show()
Example #2
0
def main(_):
    config = AttrDict(default_config())
    # Define Agent that train with ActorCrtic algorithm.
    agent = ActorCrtic(config)

    remote_policies = [RemotePolicy.remote(config) for _ in range(5)]

    train_results = []

    start_time = time.time()
    # Train for num_iters times.
    for i, result in enumerate(agent.train(config.num_episodes)):
        evals = agent.evaluate(remote_policies)
        result = result._replace(eval=np.mean(evals))

        print('\rEpisode {}/{} policy loss ({}), value loss ({}), eval ({})'.
              format(i, config.num_episodes, result.policy_loss,
                     result.val_loss, result.eval),
              end='',
              flush=True)

        train_results.append(result)

    end_time = time.time()
    duration = end_time - start_time
    # 100 episodes, Process duration: 40.74085235595703[s] using GPU
    print('\nProcess duration: {0}[s]'.format(duration))

    plot_agent_stats(train_results, 'Actor Critic algorithm.')
    plt.show()
Example #3
0
def main(_):
    config = AttrDict(default_config())
    policy = Policy(config)
    saver = tf.train.Saver()
    saver.restore(policy._sess, './checkpoints/reinforce_debug')

    frames = video_evaluate_policy(policy, config.env_name)
Example #4
0
def main(_):
    # Set configuration
    config = AttrDict(default_config())
    # Build one hot mnist model.
    model = benchmark_model.build_tf_one_hot_model(batch_size=config.batch_size,
                                                   use_bias=config.use_bias,
                                                   activation=config.activation)
    # Load one hot mnist data.
    (x_train, y_train), (x_test, y_test) = benchmark_model.load_one_hot_data(dataset=config.dataset)
    
    # Testing whether the dataset have correct shape.
    assert x_train.shape == (60000, 784)
    assert y_train.shape == (60000, 10)
    
    # Minimize model's loss with NMF optimizer.
    # optimizer = NMFOptimizer(config)
    optimizer = NMFOptimizer(config=config)
    train_op = optimizer.minimize(model.frob_norm)
    
    # Minimize model's loss with Adam optimizer.
    bp_optimizer = tf.train.AdamOptimizer(config.learning_rate)
    bp_train_op = bp_optimizer.minimize(model.cross_entropy)
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(1000):
            x, y = benchmark_model.batch(x_train, y_train, batch_size=config.batch_size)
            loss, _ = sess.run([optimizer.autoencoder_loss, optimizer.autoencoder_train_op],
                               feed_dict={model.inputs: x})
            print('\rloss {}'.format(loss), end='', flush=True)
        print()
        
        _train_and_test = functools.partial(train_and_test,
                                            sess=sess, model=model,
                                            x_train=x_train, y_train=y_train,
                                            x_test=x_test, y_test=y_test,
                                            batch_size=config.batch_size)

        print('NMF-optimizer')
        # Train with NMF optimizer.
        _train_and_test(train_op, num_iters=config.num_mf_iters)

        print('Adam-optimizer')
        # Train with Adam optimizer.
        _train_and_test(bp_train_op, num_iters=config.num_bp_iters)
Example #5
0
def main(_):
    # Set configuration
    config = AttrDict(default_config())
    # Build one hot mnist model.
    model = benchmark_model.build_tf_cross_entropy_model(
        batch_size=config.batch_size,
        use_bias=config.use_bias,
        activation=config.activation)
    # Load one hot mnist data.
    (x_train, y_train), (x_test, y_test) = benchmark_model.load_one_hot_data(
        dataset=config.dataset)

    # Testing whether the dataset have correct shape.
    assert x_train.shape == (60000, 784)
    assert y_train.shape == (60000, 10)

    # Minimize model's loss with NMF optimizer.
    # optimizer = NMFOptimizer(config)
    optimizer = NMFOptimizer()
    train_op = optimizer.minimize(model.cross_entropy)

    # Minimize model's loss with Adam optimizer.
    bp_optimizer = tf.train.AdamOptimizer(config.learning_rate)
    bp_train_op = bp_optimizer.minimize(model.cross_entropy)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        _train_and_test = functools.partial(train_and_test,
                                            sess=sess,
                                            model=model,
                                            x_train=x_train,
                                            y_train=y_train,
                                            x_test=x_test,
                                            y_test=y_test,
                                            batch_size=config.batch_size)

        print('NMF-optimizer')
        # Train with NMF optimizer.
        _train_and_test(train_op, num_iters=config.num_mf_iters)

        print('Adam-optimizer')
        # Train with Adam optimizer.
        _train_and_test(bp_train_op, num_iters=config.num_bp_iters)
Example #6
0
    def minimize(self, loss=None, pretrain=False):
        """Construct the control dependencies for calculating neural net optimized.
        
        Returns:
            tf.no_op.
            The import
        """
        self._init(loss)
        # pre-train with auto encoder.
        pretrain_op = self._autoencoder() if pretrain else tf.no_op()

        a = self.labels
        updates = []
        # Reverse
        layers = self._layers[::-1]
        for i, layer in enumerate(layers):
            u = layer.output
            v = layer.kernel
            if layer.use_bias:
                v = tf.concat((v, layer.bias[None, ...]), axis=0)

            # Not use activation (ReLU)
            if not layer.activation:
                u, v = mf.semi_nmf(
                    a=a,
                    u=u,
                    v=v,
                    use_tf=True,
                    use_bias=layer.use_bias,
                    num_iters=1,
                    first_nneg=True,
                )
            # Use activation (ReLU)
            elif utility.get_op_name(layer.activation) == 'Relu':
                u, v = mf.nonlin_semi_nmf(
                    a=a,
                    u=u,
                    v=v,
                    use_tf=True,
                    use_bias=layer.use_bias,
                    num_calc_v=1,
                    num_calc_u=1,
                    first_nneg=True,
                )
            # Use Softmax
            elif utility.get_op_name(layer.activation) == 'Softmax':
                print('used softmax!!')
                u, v = mf.softmax_nmf(
                    a=a,
                    u=u,
                    v=v,
                    use_tf=True,
                    use_bias=layer.use_bias,
                )
            if layer.use_bias:
                v, bias = utility.split_v_bias(v)
                updates.append(layer.bias.assign(bias))
            updates.append(layer.kernel.assign(v))
            a = tf.identity(u)

        return AttrDict(ae=pretrain_op, nmf=tf.group(*updates))
Example #7
0
def main(_):
    print('use_bias', FLAGS.use_bias)
    LABEL_SIZE = 4
    # Set configuration
    config = AttrDict(default_config())
    # Build one hot mnist model.
    model = benchmark_model.build_tf_hitachi_simple_model(config.batch_size,
                                                          use_bias=config.use_bias,
                                                          activation=config.activation,
                                                          label_size=LABEL_SIZE)

    # Load hitachi data.
    (x_train, y_train), (x_test, y_test) = benchmark_model.load_hitachi_data(
        config.path, test_size=0.1)

    # Testing whether the dataset have correct shape.
    # assert x_train.shape[1] == 3
    # assert y_train.shape[1] == 4

    # Minimize model's loss with NMF optimizer.
    # optimizer = NMFOptimizer(config)
    optimizer = NMFOptimizer()
    train_op = optimizer.minimize(model.loss)

    # Minimize model's loss with Adam optimizer.
    bp_optimizer = tf.train.AdamOptimizer(config.learning_rate)
    bp_train_op = bp_optimizer.minimize(model.loss)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        _train_and_test = functools.partial(train_and_test,
                                            sess=sess, model=model,
                                            x_train=x_train, y_train=y_train,
                                            x_test=x_test, y_test=y_test,
                                            batch_size=config.batch_size)

        print('Adam-optimizer')
        # Train with Adam optimizer.
        bp_train_losses, bp_test_losses = _train_and_test(bp_train_op, num_iters=config.num_bp_iters,
                                                          add_loss_before_train=True)

        print('NMF-optimizer')
        # Train with NMF optimizer.
        mf_train_losses, mf_test_losses = _train_and_test(train_op, num_iters=config.num_mf_iters)

    length_bp = bp_train_losses.shape[1]

    # mf_train_losses.insert(0, bp_train_losses[-1])
    mf_train_losses = np.hstack([bp_train_losses[:, -1, np.newaxis], mf_train_losses])

    # mf_test_losses.insert(0, bp_test_losses[-1])
    length_mf = mf_train_losses.shape[1]

    bp_x = np.arange(0, length_bp)
    mf_x = np.arange(length_bp - 1, length_bp + length_mf - 1)

    plt.rc('grid', linestyle="--", color='black')
    # 'dodgerblue', 'black'

    fig, ax = plt.subplots(2, 2, figsize=(6 * 2.5, 4 * 2.5))

    if config.num_bp_iters > 0:
        ax[0, 0].plot(bp_x, bp_train_losses[0], linestyle='-', color='dodgerblue', label='BP-B{}'.format(1))
        ax[0, 1].plot(bp_x, bp_train_losses[1], linestyle='-', color='dodgerblue', label='BP-B{}'.format(2))
        ax[1, 0].plot(bp_x, bp_train_losses[2], linestyle='-', color='dodgerblue', label='BP-B{}'.format(3))
        ax[1, 1].plot(bp_x, bp_train_losses[3], linestyle='-', color='dodgerblue', label='BP-B{}'.format(4))
    if config.num_mf_iters > 0:
        ax[0, 0].plot(mf_x, mf_train_losses[0], linestyle='-', color='black', label='NMF-B{}'.format(1))
        ax[0, 1].plot(mf_x, mf_train_losses[1], linestyle='-', color='black', label='NMF-B{}'.format(2))
        ax[1, 0].plot(mf_x, mf_train_losses[2], linestyle='-', color='black', label='NMF-B{}'.format(3))
        ax[1, 1].plot(mf_x, mf_train_losses[3], linestyle='-', color='black', label='NMF-B{}'.format(4))
    ax[0, 0].set_title("B1")
    ax[0, 1].set_title("B2")
    ax[1, 0].set_title("B3")
    ax[1, 1].set_title("B4")

    # plt.plot(bp_x, bp_test_losses, linestyle='--', color='dodgerblue', label='test-bp')
    # plt.plot(mf_x, mf_test_losses, linestyle='--', color='black', label='test-nmf')
    plt.xlabel('#Epoch')
    plt.ylabel('')
    # plt.legend()
    plt.grid(True)
    plt.show()