コード例 #1
0
ファイル: cifar10_conv.py プロジェクト: mengyuan404/tfsnippet
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    multi_gpu = MultiGPU()

    # build the model
    grads = []
    losses = []
    y_list = []
    acc_list = []
    batch_size = spt.utils.get_batch_size(input_x)
    params = None
    optimizer = tf.train.AdamOptimizer(learning_rate)

    for dev, pre_build, [dev_input_x, dev_input_y
                         ] in multi_gpu.data_parallel(batch_size,
                                                      [input_x, input_y]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            if pre_build:
                _ = model(dev_input_x, is_training, channels_last=True)

            else:
                # derive the loss, output and accuracy
                dev_logits = model(dev_input_x,
                                   is_training=is_training,
                                   channels_last=multi_gpu.channels_last(dev))
                dev_cls_loss = tf.losses.sparse_softmax_cross_entropy(
                    dev_input_y, dev_logits)
                dev_loss = dev_cls_loss + tf.losses.get_regularization_loss()
                dev_y = spt.ops.softmax_classification_output(dev_logits)
                dev_acc = spt.ops.classification_accuracy(dev_y, dev_input_y)
                losses.append(dev_loss)
                y_list.append(dev_y)
                acc_list.append(dev_acc)

                # derive the optimizer
                params = tf.trainable_variables()
                grads.append(
                    optimizer.compute_gradients(dev_loss, var_list=params))

    # merge multi-gpu outputs and operations
    [loss, acc] = multi_gpu.average([losses, acc_list], batch_size)
    [y] = multi_gpu.concat([y_list])
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_cifar10(x_shape=config.x_shape, normalize_x=True)
    train_flow = spt.DataFlow.arrays([x_train, y_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size)

    with spt.utils.create_session().as_default():
        # train the network
        with spt.TrainLoop(params,
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x, input_y],
                                  train_flow,
                                  feed_dict={is_training: True},
                                  metrics={
                                      'loss': loss,
                                      'acc': acc
                                  })
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_acc': acc},
                                      inputs=[input_x, input_y],
                                      data_flow=test_flow,
                                      feed_dict={is_training: False},
                                      time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=5)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #2
0
def main(config, result_dir):
    # print the config
    print_with_title('Configurations', config.format_config(), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(result_dir)
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = tf.placeholder(shape=(), dtype=tf.float32)
    learning_rate_var = AnnealingDynamicValue(config.initial_lr,
                                              config.lr_anneal_factor)
    multi_gpu = MultiGPU(disable_prebuild=False)

    # build the model
    grads = []
    losses = []
    test_nlls = []
    test_lbs = []
    batch_size = get_batch_size(input_x)
    params = None
    optimizer = tf.train.AdamOptimizer(learning_rate)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(batch_size, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            if pre_build:
                with arg_scope([p_net, q_net],
                               is_training=is_training,
                               channels_last=True):
                    _ = q_net(dev_input_x).chain(p_net,
                                                 latent_names=['z'],
                                                 observed={'x': dev_input_x})

            else:
                with arg_scope([p_net, q_net],
                               is_training=is_training,
                               channels_last=multi_gpu.channels_last(dev)):
                    # derive the loss and lower-bound for training
                    train_q_net = q_net(dev_input_x)
                    train_chain = train_q_net.chain(
                        p_net,
                        latent_names=['z'],
                        latent_axis=0,
                        observed={'x': dev_input_x})

                    dev_vae_loss = tf.reduce_mean(
                        train_chain.vi.training.sgvb())
                    dev_loss = dev_vae_loss + regularization_loss()
                    losses.append(dev_loss)

                    # derive the nll and logits output for testing
                    test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                    test_chain = test_q_net.chain(p_net,
                                                  latent_names=['z'],
                                                  latent_axis=0,
                                                  observed={'x': dev_input_x})
                    dev_test_nll = -tf.reduce_mean(
                        test_chain.vi.evaluation.is_loglikelihood())
                    dev_test_lb = tf.reduce_mean(
                        test_chain.vi.lower_bound.elbo())
                    test_nlls.append(dev_test_nll)
                    test_lbs.append(dev_test_lb)

                    # derive the optimizer
                    params = tf.trainable_variables()
                    grads.append(
                        optimizer.compute_gradients(dev_loss, var_list=params))

    # merge multi-gpu outputs and operations
    [loss, test_lb, test_nll] = \
        multi_gpu.average([losses, test_lbs, test_nlls], batch_size)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(grads),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # derive the plotting function
    work_dev = multi_gpu.work_devices[0]
    with tf.device(work_dev), tf.name_scope('plot_x'):
        plot_p_net = p_net(n_z=100,
                           is_training=is_training,
                           channels_last=multi_gpu.channels_last(work_dev))
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots, feed_dict={is_training: False})
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10),
                                   results=results)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # train the network
        with TrainLoop(params,
                       var_groups=['q_net', 'p_net'],
                       max_epoch=config.max_epoch,
                       max_step=config.max_step,
                       summary_dir=(results.system_path('train_summary')
                                    if config.write_summary else None),
                       summary_graph=tf.get_default_graph(),
                       early_stopping=False) as loop:
            trainer = Trainer(loop,
                              train_op, [input_x],
                              train_flow,
                              feed_dict={
                                  learning_rate: learning_rate_var,
                                  is_training: True
                              },
                              metrics={'loss': loss})
            trainer.anneal_after(learning_rate_var,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = Evaluator(loop,
                                  metrics={
                                      'test_nll': test_nll,
                                      'test_lb': test_lb
                                  },
                                  inputs=[input_x],
                                  data_flow=test_flow,
                                  feed_dict={is_training: False},
                                  time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #3
0
ファイル: gm_vae.py プロジェクト: 897615138/tfsnippet-jill
def main(config, result_dir):
    # print the config
    print_with_title('Configurations', config.format_config(), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(result_dir)
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = tf.placeholder(shape=(),
                                   dtype=tf.float32,
                                   name='learning_rate')
    learning_rate_var = AnnealingDynamicValue(config.initial_lr,
                                              config.lr_anneal_factor)

    # build the model
    with arg_scope([q_net, p_net], is_training=is_training):
        # derive the loss and lower-bound for training
        train_q_net = q_net(input_x, n_samples=config.train_n_samples)
        train_chain = train_q_net.chain(p_net,
                                        latent_names=['y', 'z'],
                                        latent_axis=0,
                                        observed={'x': input_x})

        if config.train_n_samples is None:
            baseline = reinforce_baseline_net(input_x)
            vae_loss = tf.reduce_mean(
                train_chain.vi.training.reinforce(baseline=baseline))
        else:
            vae_loss = tf.reduce_mean(train_chain.vi.training.vimco())
        loss = vae_loss + regularization_loss()

        # derive the nll and logits output for testing
        test_q_net = q_net(input_x, n_samples=config.test_n_samples)
        test_chain = test_q_net.chain(p_net,
                                      latent_names=['y', 'z'],
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())

        # derive the classifier via q(y|x)
        q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits, axis=-1)

    # derive the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    params = tf.trainable_variables()
    grads = optimizer.compute_gradients(loss, var_list=params)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plot_x'):
        plot_p_net = p_net(
            observed={'y': tf.range(config.n_clusters, dtype=tf.int32)},
            n_z=10,
            is_training=is_training)
        x_plots = tf.reshape(
            tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)),
            (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots, feed_dict={is_training: False})
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(config.n_clusters, 10),
                                   results=results)

    # derive the final un-supervised classifier
    c_classifier = ClusteringClassifier(config.n_clusters, 10)

    def train_classifier(loop):
        df = bernoulli_flow(x_train,
                            config.batch_size,
                            shuffle=False,
                            skip_incomplete=False)
        with loop.timeit('cls_train_time'):
            [c_pred] = collect_outputs(outputs=[q_y_given_x],
                                       inputs=[input_x],
                                       data_flow=df,
                                       feed_dict={is_training: False})
            c_classifier.fit(c_pred, y_train)
            print(c_classifier.describe())

    def evaluate_classifier(loop):
        with loop.timeit('cls_test_time'):
            [c_pred] = collect_outputs(outputs=[q_y_given_x],
                                       inputs=[input_x],
                                       data_flow=test_flow,
                                       feed_dict={is_training: False})
            y_pred = c_classifier.predict(c_pred)
            cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)}
            loop.collect_metrics(cls_metrics)
            results.update_metrics(cls_metrics)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # train the network
        with TrainLoop(params,
                       var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'],
                       max_epoch=config.max_epoch,
                       max_step=config.max_step,
                       summary_dir=(results.system_path('train_summary')
                                    if config.write_summary else None),
                       summary_graph=tf.get_default_graph(),
                       early_stopping=False) as loop:
            trainer = Trainer(loop,
                              train_op, [input_x],
                              train_flow,
                              feed_dict={
                                  learning_rate: learning_rate_var,
                                  is_training: True
                              },
                              metrics={'loss': loss})
            trainer.anneal_after(learning_rate_var,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = Evaluator(loop,
                                  metrics={'test_nll': test_nll},
                                  inputs=[input_x],
                                  data_flow=test_flow,
                                  feed_dict={is_training: False},
                                  time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                train_classifier, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                evaluate_classifier, loop),
                                          freq=10)

            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f:
        f.write(c_classifier.describe())
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #4
0
ファイル: main.py プロジェクト: sajavadi/OmniAnomaly
                var_dict = get_variables_as_dict(model_vs)
                saver = VariableSaver(var_dict, config.save_dir)
                saver.save()
            print('=' * 30 + 'result' + '=' * 30)
            pprint(best_valid_metrics)


if __name__ == '__main__':

    # get config obj
    config = ExpConfig()

    # parse the arguments
    arg_parser = ArgumentParser()
    register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])
    config.x_dim = get_data_dim(config.dataset)

    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories if specified
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs(config.save_dir, exist_ok=True)
    with warnings.catch_warnings():
        # suppress DeprecationWarning from NumPy caused by codes in TensorFlow-Probability
        warnings.filterwarnings("ignore",
                                category=DeprecationWarning,
                                module='numpy')
        main()
コード例 #5
0
ファイル: cifar10.py プロジェクト: 897615138/tfsnippet-jill
def main(config, result_dir):
    # print the config
    print_with_title('Configurations', config.format_config(), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(result_dir)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, config.x_dim),
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = tf.placeholder(shape=(), dtype=tf.float32)
    learning_rate_var = AnnealingDynamicValue(config.initial_lr,
                                              config.lr_anneal_factor)

    # derive the loss, output and accuracy
    logits = model(input_x, is_training=is_training)
    softmax_loss = softmax_classification_loss(logits, input_y)
    loss = softmax_loss + regularization_loss()
    y = softmax_classification_output(logits)
    acc = classification_accuracy(y, input_y)

    # derive the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    params = tf.trainable_variables()
    grads = optimizer.compute_gradients(loss, var_list=params)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.apply_gradients(grads)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        load_cifar10(x_shape=(config.x_dim,), normalize_x=True)
    train_flow = DataFlow.arrays([x_train, y_train],
                                 config.batch_size,
                                 shuffle=True,
                                 skip_incomplete=True)
    test_flow = DataFlow.arrays([x_test, y_test], config.test_batch_size)

    with create_session().as_default():
        # train the network
        with TrainLoop(params,
                       max_epoch=config.max_epoch,
                       max_step=config.max_step,
                       summary_dir=(results.system_path('train_summary')
                                    if config.write_summary else None),
                       summary_graph=tf.get_default_graph(),
                       early_stopping=False) as loop:
            trainer = Trainer(loop,
                              train_op, [input_x, input_y],
                              train_flow,
                              feed_dict={
                                  learning_rate: learning_rate_var,
                                  is_training: True
                              },
                              metrics={
                                  'loss': loss,
                                  'acc': acc
                              })
            trainer.anneal_after(learning_rate_var,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = Evaluator(loop,
                                  metrics={'test_acc': acc},
                                  inputs=[input_x, input_y],
                                  data_flow=test_flow,
                                  feed_dict={is_training: False},
                                  time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=5)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #6
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x, n_samples=config.train_n_samples)
        train_chain = train_q_net.chain(p_net,
                                        latent_axis=0,
                                        observed={'x': input_x})

        if config.vi_algorithm == 'reinforce':
            baseline = reinforce_baseline_net(input_x)
            vae_loss = tf.reduce_mean(
                train_chain.vi.training.reinforce(baseline=baseline))
        else:
            assert (config.vi_algorithm == 'vimco')
            vae_loss = tf.reduce_mean(train_chain.vi.training.vimco())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, n_samples=config.test_n_samples)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())

        # derive the classifier via q(y|x)
        q_y_given_x = tf.argmax(test_q_net['y'].distribution.logits,
                                axis=-1,
                                name='q_y_given_x')

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(
            observed={'y': tf.range(config.n_clusters, dtype=tf.int32)},
            n_z=10)
        x_plots = tf.reshape(
            tf.transpose(bernoulli_as_pixel(plot_p_net['x']), (1, 0, 2)),
            (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(config.n_clusters, 10),
                                   results=results)

    # derive the final un-supervised classifier
    c_classifier = ClusteringClassifier(config.n_clusters, 10)

    def train_classifier(loop):
        df = bernoulli_flow(x_train,
                            config.batch_size,
                            shuffle=False,
                            skip_incomplete=False)
        with loop.timeit('cls_train_time'):
            [c_pred] = collect_outputs(
                outputs=[q_y_given_x],
                inputs=[input_x],
                data_flow=df,
            )
            c_classifier.fit(c_pred, y_train)
            print(c_classifier.describe())

    def evaluate_classifier(loop):
        with loop.timeit('cls_test_time'):
            [c_pred] = collect_outputs(
                outputs=[q_y_given_x],
                inputs=[input_x],
                data_flow=test_flow,
            )
            y_pred = c_classifier.predict(c_pred)
            cls_metrics = {'test_acc': accuracy_score(y_test, y_pred)}
            loop.collect_metrics(cls_metrics)
            results.update_metrics(cls_metrics)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_mnist(x_shape=[784])
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # train the network
        with spt.TrainLoop(
                params,
                var_groups=['p_net', 'q_net', 'gaussian_mixture_prior'],
                max_epoch=config.max_epoch,
                max_step=config.max_step,
                summary_dir=(results.system_path('train_summary')
                             if config.write_summary else None),
                summary_graph=tf.get_default_graph(),
                early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss},
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_nll': test_nll},
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                train_classifier, loop),
                                          freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                evaluate_classifier, loop),
                                          freq=10)

            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    with codecs.open('cluster_classifier.txt', 'wb', 'utf-8') as f:
        f.write(c_classifier.describe())
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #7
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting/sample', exist_ok=True)
    results.make_dirs('plotting/z_plot', exist_ok=True)
    results.make_dirs('plotting/train.reconstruct', exist_ok=True)
    results.make_dirs('plotting/test.reconstruct', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    beta = tf.Variable(initial_value=0.1,
                       dtype=tf.float32,
                       name='beta',
                       trainable=True)
    beta = tf.clip_by_value(beta, config.beta, 1.0)

    # derive the loss for initializing
    with tf.name_scope('initialization'), \
         arg_scope([spt.layers.act_norm], initializing=True), \
         spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_q_net = q_net(input_x, n_z=config.train_n_qz)
        init_p_net = p_net(observed={
            'x': input_x,
            'z': init_q_net['z']
        },
                           n_z=config.train_n_qz,
                           beta=beta)
        init_loss = get_all_loss(init_q_net, init_p_net)

    # derive the loss and lower-bound for training
    with tf.name_scope('training'), \
         arg_scope([batch_norm], training=True):
        train_q_net = q_net(input_x, n_z=config.train_n_qz)
        train_p_net = p_net(observed={
            'x': input_x,
            'z': train_q_net['z']
        },
                            n_z=config.train_n_qz,
                            beta=beta)

        VAE_loss = get_all_loss(train_q_net, train_p_net)
        VAE_loss += tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, n_z=config.test_n_qz)
        test_chain = test_q_net.chain(p_net,
                                      observed={'x': input_x},
                                      n_z=config.test_n_qz,
                                      latent_axis=0,
                                      beta=beta)
        test_nll = -tf.reduce_mean(
            spt.ops.log_mean_exp(
                tf.reshape(test_chain.vi.evaluation.is_loglikelihood(), (
                    -1,
                    config.test_x_samples,
                )),
                axis=-1)) + config.x_shape_multiple * np.log(128.0)

        test_recon = tf.reduce_mean(test_chain.model['x'].log_prob())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())
        test_mse = tf.reduce_sum(
            (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) -
             tf.round(test_chain.model['x'] * 128 + 127.5))**2,
            axis=[-1, -2, -3])  # (sample_dim, batch_dim, x_sample_dim)
        test_mse = tf.reduce_min(test_mse, axis=[0])
        test_mse = tf.reduce_mean(
            tf.reduce_mean(tf.reshape(test_mse, (
                -1,
                config.test_x_samples,
            )),
                           axis=-1))

    # derive the optimizer
    with tf.name_scope('optimizing'):
        VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables(
            'G_theta') + tf.trainable_variables('beta')
        print("========VAE_params=========")
        print(VAE_params)
        with tf.variable_scope('VAE_optimizer'):
            VAE_optimizer = tf.train.AdamOptimizer(learning_rate)
        VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params)

        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        x_plots = 256.0 * tf.reshape(
            p_net(n_z=100, mcmc_iterator=0, beta=beta)['x'].distribution.mean,
            (-1, ) + config.x_shape) / 2 + 127.5
        reconstruct_q_net = q_net(input_x)
        reconstruct_z = reconstruct_q_net['z']
        reconstruct_plots = 256.0 * tf.reshape(
            p_net(observed={'z': reconstruct_z}, beta=beta)['x'],
            (-1, ) + config.x_shape) / 2 + 127.5
        x_plots = tf.clip_by_value(x_plots, 0, 255)
        reconstruct_plots = tf.clip_by_value(reconstruct_plots, 0, 255)

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            # plot samples
            images = session.run(x_plots)
            # pyplot.scatter(z_points[:, 0], z_points[:, 1], s=5)
            # pyplot.savefig(results.system_path('plotting/z_plot/{}.pdf'.format(loop.epoch)))
            # pyplot.close()
            # print(images)
            try:
                print(np.max(images), np.min(images))
                images = np.round(images)
                save_images_collection(
                    images=images,
                    filename='plotting/sample/{}.png'.format(loop.epoch),
                    grid_size=(10, 10),
                    results=results,
                )

                # plot reconstructs
                for [x] in reconstruct_train_flow:
                    x_samples = uniform_sampler.sample(x)
                    images = np.zeros((150, ) + config.x_shape, dtype=np.uint8)
                    images[::3, ...] = np.round(256.0 * x / 2 + 127.5)
                    images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5)
                    images[2::3, ...] = np.round(
                        session.run(reconstruct_plots, feed_dict={input_x: x}))
                    save_images_collection(
                        images=images,
                        filename='plotting/train.reconstruct/{}.png'.format(
                            loop.epoch),
                        grid_size=(10, 15),
                        results=results,
                    )
                    break

                # plot reconstructs
                for [x] in reconstruct_test_flow:
                    x_samples = uniform_sampler.sample(x)
                    images = np.zeros((150, ) + config.x_shape, dtype=np.uint8)
                    images[::3, ...] = np.round(256.0 * x / 2 + 127.5)
                    images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5)
                    images[2::3, ...] = np.round(
                        session.run(reconstruct_plots, feed_dict={input_x: x}))
                    save_images_collection(
                        images=images,
                        filename='plotting/test.reconstruct/{}.png'.format(
                            loop.epoch),
                        grid_size=(10, 15),
                        results=results,
                    )
                    break
            except Exception as e:
                print(e)

    # prepare for training and testing data
    (_x_train, _y_train), (_x_test, _y_test) = \
        spt.datasets.load_cifar10(x_shape=config.x_shape)
    # train_flow = bernoulli_flow(
    #     x_train, config.batch_size, shuffle=True, skip_incomplete=True)
    x_train = (_x_train - 127.5) / 256.0 * 2
    x_test = (_x_test - 127.5) / 256.0 * 2
    uniform_sampler = UniformNoiseSampler(-1.0 / 256.0,
                                          1.0 / 256.0,
                                          dtype=np.float)
    train_flow = spt.DataFlow.arrays([x_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    train_flow = train_flow.map(uniform_sampler)
    reconstruct_train_flow = spt.DataFlow.arrays([x_train],
                                                 50,
                                                 shuffle=True,
                                                 skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays([x_test],
                                                50,
                                                shuffle=True,
                                                skip_incomplete=False)
    test_flow = spt.DataFlow.arrays(
        [np.repeat(x_test, config.test_x_samples, axis=0)],
        config.test_batch_size)
    test_flow = test_flow.map(uniform_sampler)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        for [x] in train_flow:
            print('Network initialized, first-batch loss is {:.6g}.\n'.format(
                session.run(init_loss, feed_dict={input_x: x})))
            break

        # train the network
        with spt.TrainLoop(
                tf.trainable_variables(),
                var_groups=['q_net', 'p_net'],
                max_epoch=config.max_epoch + 1,
                max_step=config.max_step,
                summary_dir=(results.system_path('train_summary')
                             if config.write_summary else None),
                summary_graph=tf.get_default_graph(),
                early_stopping=False,
                checkpoint_dir=results.system_path('checkpoint'),
                checkpoint_epoch_freq=100,
                restore_checkpoint=
                "/mnt/mfs/mlstorage-experiments/cwx17/83/19/6f3b6c3ef49d6d6c81d5/checkpoint/checkpoint/checkpoint.dat-585000"
        ) as loop:

            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb,
                                          'test_recon': test_recon,
                                          'test_mse': test_mse
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')

            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()
            epoch_iterator = loop.iter_epochs()
            for epoch in epoch_iterator:
                plot_samples(loop)
                dataset_img = _x_train
                mala_img = []
                for i in range(config.fid_samples // config.sample_n_z):
                    mala_images = session.run(x_plots)
                    mala_img.append(mala_images)
                    print('{}-th sample finished...'.format(i))

                mala_img = np.concatenate(mala_img, axis=0).astype('uint8')
                mala_img = np.asarray(mala_img)
                np.savez('sample_store', mala_img=mala_img)

                FID = get_fid(mala_img, dataset_img)
                IS_mean, IS_std = get_inception_score(mala_img)
                loop.collect_metrics(FID=FID)
                loop.collect_metrics(IS=IS_mean)

                # ori_img = np.concatenate(ori_img, axis=0).astype('uint8')
                # ori_img = np.asarray(ori_img)
                # FID = get_fid_google(ori_img, dataset_img)
                # IS_mean, IS_std = get_inception_score(ori_img)
                # loop.collect_metrics(FID_ori=FID)
                # loop.collect_metrics(IS_ori=IS_mean)

                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #8
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting/sample', exist_ok=True)
    results.make_dirs('plotting/z_plot', exist_ok=True)
    results.make_dirs('plotting/train.reconstruct', exist_ok=True)
    results.make_dirs('plotting/test.reconstruct', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers,
                                                         name='posterior_flow')

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    input_origin_x = tf.placeholder(dtype=tf.float32,
                                    shape=(None, ) + config.x_shape,
                                    name='input_origin_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    beta = tf.Variable(initial_value=0.0,
                       dtype=tf.float32,
                       name='beta',
                       trainable=True)

    # derive the nll and logits output for testing
    with tf.name_scope('testing'), \
         arg_scope([batch_norm], training=True):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz)
        # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z())
        test_pn_net = p_net(n_z=config.test_n_pz,
                            mcmc_iterator=0,
                            beta=beta,
                            log_Z=get_log_Z())
        test_p_net = p_net(observed={'z': test_q_net['z']},
                           n_z=config.test_n_qz,
                           beta=beta,
                           log_Z=get_log_Z())
        pn_abs = tf.abs(
            tf.reduce_mean(D_psi(test_pn_net['x']), axis=0) -
            D_psi(test_pn_net['x'].distribution.mean))
        print(pn_abs)
        pn_abs = tf.reduce_mean(pn_abs)
        p_abs = tf.abs(
            tf.reduce_mean(D_psi(test_p_net['x']), axis=0) -
            D_psi(test_p_net['x'].distribution.mean))
        p_abs = tf.reduce_mean(p_abs)

    xi_node = get_var('p_net/xi')

    # prepare for training and testing data
    (_x_train,
     _y_train), (_x_test,
                 _y_test) = spt.datasets.load_cifar10(x_shape=config.x_shape)
    x_train = (_x_train - 127.5) / 256.0 * 2
    x_test = (_x_test - 127.5) / 256.0 * 2
    # uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float)
    train_flow = spt.DataFlow.arrays([x_train, x_train],
                                     config.test_batch_size)
    random_train_flow = spt.DataFlow.arrays([x_train, x_train],
                                            config.test_batch_size,
                                            shuffle=True)
    reconstruct_train_flow = spt.DataFlow.arrays([x_train],
                                                 100,
                                                 shuffle=True,
                                                 skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays([x_test],
                                                100,
                                                shuffle=True,
                                                skip_incomplete=False)
    test_flow = spt.DataFlow.arrays([x_test, x_test], config.test_batch_size)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        # for [x, origin_x] in train_flow:
        #     print('Network initialized, first-batch loss is {:.6g}.\n'.
        #           format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x})))
        #     break

        # if config.z_dim == 512:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 1024:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 2048:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 3072:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # else:
        restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/3d/0c/d445f4f80a9fee59aed5/checkpoint/checkpoint/checkpoint.dat-312000'

        # train the network
        with spt.TrainLoop(tf.trainable_variables(),
                           var_groups=[
                               'q_net', 'p_net', 'posterior_flow', 'G_theta',
                               'D_psi', 'G_omega', 'D_kappa'
                           ],
                           max_epoch=config.max_epoch + 10,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False,
                           checkpoint_dir=results.system_path('checkpoint'),
                           checkpoint_epoch_freq=100,
                           restore_checkpoint=restore_checkpoint) as loop:
            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()

            epoch_iterator = loop.iter_epochs()

            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'pn_abs': pn_abs,
                                          'p_abs': p_abs
                                      },
                                      inputs=[input_x, input_origin_x],
                                      data_flow=train_flow,
                                      time_metric_name='test_time')

            # adversarial training
            for epoch in epoch_iterator:
                evaluator.run()
                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #9
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting/sample', exist_ok=True)
    results.make_dirs('plotting/z_plot', exist_ok=True)
    results.make_dirs('plotting/train.reconstruct', exist_ok=True)
    results.make_dirs('plotting/test.reconstruct', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    posterior_flow = spt.layers.planar_normalizing_flows(
        config.nf_layers, name='posterior_flow')

    # input placeholders
    input_x = tf.placeholder(
        dtype=tf.int32, shape=(None,) + config.x_shape, name='input_x')
    input_origin_x = tf.placeholder(
        dtype=tf.float32, shape=(None,) + config.x_shape, name='input_origin_x')
    warm = tf.placeholder(
        dtype=tf.float32, shape=(), name='warm')
    mcmc_alpha = tf.placeholder(
        dtype=tf.float32, shape=(1,), name='mcmc_alpha')
    learning_rate = spt.AnnealingVariable(
        'learning_rate', config.initial_lr, config.lr_anneal_factor)
    beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True)

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_origin_x, posterior_flow, n_z=config.test_n_qz)
        # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z())
        test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z())
        test_chain = test_q_net.chain(p_net, observed={'x': tf.to_float(input_x)}, n_z=config.test_n_qz, latent_axis=0,
                                      beta=beta, log_Z=get_log_Z())
        test_mse = tf.reduce_sum(
            (tf.round(test_chain.model['x'].distribution.mean * 255.0) - tf.round(
                tf.to_float(test_chain.model['x']) * 255.0)) ** 2,
            axis=[-1, -2, -3])  # (sample_dim, batch_dim, x_sample_dim)
        test_mse = tf.reduce_min(test_mse, axis=[0])
        test_mse = tf.reduce_mean(tf.reduce_mean(tf.reshape(
            test_mse, (-1, config.test_x_samples,)
        ), axis=-1))
        test_nll = -tf.reduce_mean(
            tf.reshape(
                test_chain.vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,)
            )
        )
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())
        test_recon = test_chain.model['x'].log_prob()
        p_z = test_chain.model['z'].distribution.log_prob(
            test_chain.model['z'], group_ndims=1, y=test_chain.model['x']
        ).log_energy_prob
        q_z_given_x = test_q_net['z'].log_prob()


        vi = spt.VariationalInference(
            log_joint=test_recon + p_z,
            latent_log_probs=[q_z_given_x],
            axis=0
        )
        test_recon = tf.reduce_mean(test_recon)
        adv_test_nll = -tf.reduce_mean(
            tf.reshape(
                vi.evaluation.is_loglikelihood(), (-1, config.test_x_samples,)
            )
        )
        adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo())

        real_energy = tf.reduce_mean(D_psi(input_origin_x))
        reconstruct_energy = tf.reduce_mean(D_psi(test_chain.model['x'].distribution.mean))
        pd_energy = tf.reduce_mean(
            D_psi(test_pn_net['x'].distribution.mean) * tf.exp(
                test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob()))
        pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean))
        log_Z_compute_op = spt.ops.log_mean_exp(
            -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob())

        p_z_energy = test_chain.model['z'].log_prob().energy

        another_log_Z_compute_op = spt.ops.log_mean_exp(
            -p_z_energy - q_z_given_x + np.log(config.len_train)
        )
        kl_adv_and_gaussian = tf.reduce_mean(
            test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob
        )
    xi_node = get_var('p_net/xi')
    # derive the optimizer

    # prepare for training and testing data
    (_x_train, _y_train), (_x_test, _y_test) = \
        spt.datasets.fashion_mnist(x_shape=config.x_shape)
    # train_flow = bernoulli_flow(
    #     x_train, config.batch_size, shuffle=True, skip_incomplete=True)
    x_train = _x_train / 255.0
    x_test = _x_test / 255.0
    bernouli_sampler = BernoulliSampler()
    train_flow = spt.DataFlow.arrays([x_train, x_train], config.batch_size, shuffle=True, skip_incomplete=True)
    train_flow = train_flow.map(lambda x, y: [bernouli_sampler.sample(x), y])
    Z_compute_flow = spt.DataFlow.arrays([x_train, x_train], config.test_batch_size, shuffle=True, skip_incomplete=True)
    Z_compute_flow = Z_compute_flow.map(lambda x, y: [bernouli_sampler.sample(x), y])
    reconstruct_train_flow = spt.DataFlow.arrays(
        [x_train], 100, shuffle=True, skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays(
        [x_test], 100, shuffle=True, skip_incomplete=False)

    test_flow = spt.DataFlow.arrays(
        [x_test, x_test],
        config.test_batch_size
    )
    test_flow = test_flow.map(lambda x, y: [bernouli_sampler.sample(x), y])
    # mapped_test_flow = test_flow.to_arrays_flow(config.test_batch_size).map(bernouli_sampler)
    # gathered_flow = spt.DataFlow.gather([test_flow, mapped_test_flow])

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        # for [x, origin_x] in train_flow:
        #     print('Network initialized, first-batch loss is {:.6g}.\n'.
        #           format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x})))
        #     break

        # if config.z_dim == 512:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 1024:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 2048:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 3072:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # else:
        restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/b9/1c/d445f4f80a9f8ab3c0e5/checkpoint/checkpoint/checkpoint.dat-936000'
        # train the network
        with spt.TrainLoop(tf.trainable_variables(),
                           var_groups=['q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega', 'D_kappa'],
                           max_epoch=config.max_epoch + 10,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False,
                           checkpoint_dir=results.system_path('checkpoint'),
                           checkpoint_epoch_freq=100,
                           restore_checkpoint=restore_checkpoint
                           ) as loop:

            evaluator = spt.Evaluator(
                loop,
                metrics={'test_nll': test_nll, 'test_lb': test_lb,
                         'adv_test_nll': adv_test_nll, 'adv_test_lb': adv_test_lb,
                         'reconstruct_energy': reconstruct_energy,
                         'real_energy': real_energy,
                         'pd_energy': pd_energy, 'pn_energy': pn_energy,
                         'test_recon': test_recon, 'kl_adv_and_gaussian': kl_adv_and_gaussian, 'test_mse': test_mse},
                inputs=[input_x, input_origin_x],
                data_flow=test_flow,
                time_metric_name='test_time'
            )

            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()

            epoch_iterator = loop.iter_epochs()

            n_critical = config.n_critical
            all_nll_list = []
            all_log_Z_list = []
            # adversarial training
            for epoch in epoch_iterator:
                with loop.timeit('compute_Z_time'):
                    # log_Z_list = []
                    # for i in range(config.log_Z_times):
                    #     log_Z_list.append(session.run(log_Z_compute_op))
                    # from scipy.misc import logsumexp
                    # log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(len(log_Z_list))
                    # print('log_Z_list:{}'.format(log_Z_list))
                    # print('log_Z:{}'.format(log_Z))

                    log_Z_list = []
                    for i in range(config.log_Z_times):
                        for [batch_x, batch_origin_x] in Z_compute_flow:
                            log_Z_list.append(session.run(another_log_Z_compute_op, feed_dict={
                                input_x: batch_x,
                                input_origin_x: batch_origin_x
                            }))
                    from scipy.misc import logsumexp
                    another_log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(len(log_Z_list))
                    # print('log_Z_list:{}'.format(log_Z_list))
                    print('another_log_Z:{}'.format(another_log_Z))
                    # final_log_Z = logsumexp(np.asarray([log_Z, another_log_Z])) - np.log(2)
                    final_log_Z = another_log_Z  # TODO
                    get_log_Z().set(final_log_Z)

                with loop.timeit('eval_time'):
                    evaluator.run()

                all_nll_list.append(loop._epoch_metrics.metrics['adv_test_nll'].mean)
                all_log_Z_list.append(final_log_Z)

                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

            all_nll_list = np.asarray(all_nll_list)
            all_log_Z_list = np.asarray(all_log_Z_list)
            print('NLL: {} ± {}'.format(np.mean(all_nll_list), np.std(all_nll_list)))
            print('log_Z: {} ± {}'.format(np.mean(all_log_Z_list), np.std(all_log_Z_list)))

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #10
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting/sample', exist_ok=True)
    results.make_dirs('plotting/z_plot', exist_ok=True)
    results.make_dirs('plotting/train.reconstruct', exist_ok=True)
    results.make_dirs('plotting/test.reconstruct', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    posterior_flow = spt.layers.planar_normalizing_flows(config.nf_layers,
                                                         name='posterior_flow')

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    warm = tf.placeholder(dtype=tf.float32, shape=(), name='warm')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    beta = tf.Variable(initial_value=0.0,
                       dtype=tf.float32,
                       name='beta',
                       trainable=True)

    # derive the loss for initializing
    with tf.name_scope('initialization'), \
         arg_scope([spt.layers.act_norm], initializing=True), \
         spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_pn_net = p_net(n_z=config.train_n_pz, beta=beta)
        init_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz)
        init_p_net = p_net(observed={
            'x': input_x,
            'z': init_q_net['z']
        },
                           n_z=config.train_n_qz,
                           beta=beta)
        init_loss = sum(get_all_loss(init_q_net, init_p_net, init_pn_net))

    # derive the loss and lower-bound for training
    with tf.name_scope('training'), \
         arg_scope([batch_norm, dropout], training=True):
        train_pn_net = p_net(n_z=config.train_n_pz, beta=beta)
        train_log_Z = spt.ops.log_mean_exp(
            -train_pn_net['z'].log_prob().energy -
            train_pn_net['z'].log_prob())
        train_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz)
        train_p_net = p_net(observed={
            'x': input_x,
            'z': train_q_net['z']
        },
                            n_z=config.train_n_qz,
                            beta=beta,
                            log_Z=train_log_Z)

        VAE_loss, D_loss, G_loss, debug = get_all_loss(train_q_net,
                                                       train_p_net,
                                                       train_pn_net, warm)

        VAE_loss += tf.losses.get_regularization_loss()
        D_loss += tf.losses.get_regularization_loss()
        G_loss += tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz)
        # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z())
        test_pn_net = p_net(n_z=config.test_n_pz,
                            mcmc_iterator=0,
                            beta=beta,
                            log_Z=get_log_Z())
        test_chain = test_q_net.chain(p_net,
                                      observed={'x': input_x},
                                      n_z=config.test_n_qz,
                                      latent_axis=0,
                                      beta=beta,
                                      log_Z=get_log_Z())
        test_recon = tf.reduce_mean(test_chain.model['x'].log_prob())
        test_mse = tf.reduce_sum(
            (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) -
             tf.round(test_chain.model['x'] * 128 + 127.5))**2,
            axis=[-1, -2, -3])  # (sample_dim, batch_dim, x_sample_dim)
        test_mse = tf.reduce_min(test_mse, axis=[0])
        test_mse = tf.reduce_mean(
            tf.reduce_mean(tf.reshape(test_mse, (
                -1,
                config.test_x_samples,
            )),
                           axis=-1))
        test_nll = -tf.reduce_mean(
            spt.ops.log_mean_exp(
                tf.reshape(test_chain.vi.evaluation.is_loglikelihood(), (
                    -1,
                    config.test_x_samples,
                )),
                axis=-1)) + config.x_shape_multiple * np.log(128.0)
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

        vi = spt.VariationalInference(
            log_joint=test_chain.model['x'].log_prob() +
            test_chain.model['z'].distribution.log_prob(
                test_chain.model['z'], group_ndims=1,
                y=test_chain.model['x']).log_energy_prob,
            latent_log_probs=[test_q_net['z'].log_prob()],
            axis=0)
        adv_test_nll = -tf.reduce_mean(
            spt.ops.log_mean_exp(
                tf.reshape(vi.evaluation.is_loglikelihood(), (
                    -1,
                    config.test_x_samples,
                )),
                axis=-1)) + config.x_shape_multiple * np.log(128.0)
        adv_test_lb = tf.reduce_mean(vi.lower_bound.elbo())

        real_energy = tf.reduce_mean(D_psi(input_origin_x))
        reconstruct_energy = tf.reduce_mean(
            D_psi(test_chain.model['x'].distribution.mean))
        pd_energy = tf.reduce_mean(
            D_psi(test_pn_net['x'].distribution.mean) *
            tf.exp(test_pn_net['z'].log_prob().log_energy_prob -
                   test_pn_net['z'].log_prob()))
        pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean))
        log_Z_compute_op = spt.ops.log_mean_exp(
            -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob())
        kl_adv_and_gaussian = tf.reduce_mean(
            test_pn_net['z'].log_prob() -
            test_pn_net['z'].log_prob().log_energy_prob)
    xi_node = get_var('p_net/xi')
    # derive the optimizer
    with tf.name_scope('optimizing'):
        VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables(
            'G_theta') + tf.trainable_variables(
                'beta') + tf.trainable_variables(
                    'p_net/xi') + tf.trainable_variables('posterior_flow')
        D_params = tf.trainable_variables('D_psi')
        G_params = tf.trainable_variables('G_theta')
        print("========VAE_params=========")
        print(VAE_params)
        print("========D_params=========")
        print(D_params)
        print("========G_params=========")
        print(G_params)
        with tf.variable_scope('VAE_optimizer'):
            _VAE_grads = tf.gradients(VAE_loss, G_params)
            VAE_grad = []
            for grad in _VAE_grads:
                VAE_grad.append(tf.reshape(grad, (-1, )))
            VAE_grad = tf.concat(VAE_grad, axis=0)
            # above is working for get the gradient for G_theta
            VAE_optimizer = tf.train.AdamOptimizer(learning_rate)
            VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params)
        with tf.variable_scope('D_optimizer'):
            D_optimizer = tf.train.AdamOptimizer(learning_rate,
                                                 beta1=0.5,
                                                 beta2=0.999)
            D_grads = D_optimizer.compute_gradients(D_loss, D_params)
        with tf.variable_scope('G_optimizer'):
            G_optimizer = tf.train.AdamOptimizer(learning_rate,
                                                 beta1=0.5,
                                                 beta2=0.999)
            G_grads = G_optimizer.compute_gradients(G_loss, G_params)

            _G_grads = tf.gradients(G_loss, G_params)
            G_grad = [tf.reshape(grad, (-1, )) for grad in _G_grads]
            G_grad = tf.concat(G_grad, axis=0)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads)
            G_train_op = G_optimizer.apply_gradients(G_grads)
        D_train_op = D_optimizer.apply_gradients(D_grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        x_plots = 256.0 * tf.reshape(
            p_net(n_z=100, mcmc_iterator=0, beta=beta)['x'].distribution.mean,
            (-1, ) + config.x_shape) / 2 + 127.5
        reconstruct_q_net = q_net(input_x, posterior_flow)
        reconstruct_z = reconstruct_q_net['z']
        reconstruct_plots = 256.0 * tf.reshape(
            p_net(observed={'z': reconstruct_z}, beta=beta)['x'],
            (-1, ) + config.x_shape) / 2 + 127.5
        x_plots = tf.clip_by_value(x_plots, 0, 255)
        reconstruct_plots = tf.clip_by_value(reconstruct_plots, 0, 255)

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            # plot samples
            images = session.run(x_plots)
            # pyplot.scatter(z_points[:, 0], z_points[:, 1], s=5)
            # pyplot.savefig(results.system_path('plotting/z_plot/{}.pdf'.format(loop.epoch)))
            # pyplot.close()
            # print(images)
            try:
                print(np.max(images), np.min(images))
                images = np.round(images)
                save_images_collection(
                    images=images,
                    filename='plotting/sample/{}.png'.format(loop.epoch),
                    grid_size=(10, 10),
                    results=results,
                )

                # plot reconstructs
                for [x] in reconstruct_train_flow:
                    x_samples = uniform_sampler.sample(x)
                    images = np.zeros((150, ) + config.x_shape, dtype=np.uint8)
                    images[::3, ...] = np.round(256.0 * x / 2 + 127.5)
                    images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5)
                    images[2::3, ...] = np.round(
                        session.run(reconstruct_plots, feed_dict={input_x: x}))
                    save_images_collection(
                        images=images,
                        filename='plotting/train.reconstruct/{}.png'.format(
                            loop.epoch),
                        grid_size=(10, 15),
                        results=results,
                    )
                    break

                # plot reconstructs
                for [x] in reconstruct_test_flow:
                    x_samples = uniform_sampler.sample(x)
                    images = np.zeros((150, ) + config.x_shape, dtype=np.uint8)
                    images[::3, ...] = np.round(256.0 * x / 2 + 127.5)
                    images[1::3, ...] = np.round(256.0 * x_samples / 2 + 127.5)
                    images[2::3, ...] = np.round(
                        session.run(reconstruct_plots, feed_dict={input_x: x}))
                    save_images_collection(
                        images=images,
                        filename='plotting/test.reconstruct/{}.png'.format(
                            loop.epoch),
                        grid_size=(10, 15),
                        results=results,
                    )
                    break
            except Exception as e:
                print(e)

    # prepare for training and testing data
    (_x_train, _y_train), (_x_test, _y_test) = \
        spt.datasets.load_cifar10(x_shape=config.x_shape)
    # train_flow = bernoulli_flow(
    #     x_train, config.batch_size, shuffle=True, skip_incomplete=True)
    x_train = (_x_train - 127.5) / 256.0 * 2
    x_test = (_x_test - 127.5) / 256.0 * 2
    uniform_sampler = UniformNoiseSampler(-1.0 / 256.0,
                                          1.0 / 256.0,
                                          dtype=np.float)
    train_flow = spt.DataFlow.arrays([x_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    train_flow = train_flow.map(uniform_sampler)
    gan_train_flow = spt.DataFlow.arrays(
        [np.concatenate([x_train, x_test], axis=0)],
        config.batch_size,
        shuffle=True,
        skip_incomplete=True)
    gan_train_flow = gan_train_flow.map(uniform_sampler)
    reconstruct_train_flow = spt.DataFlow.arrays([x_train],
                                                 50,
                                                 shuffle=True,
                                                 skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays([x_test],
                                                50,
                                                shuffle=True,
                                                skip_incomplete=False)
    test_flow = spt.DataFlow.arrays(
        [np.repeat(x_test, config.test_x_samples, axis=0)],
        config.test_batch_size)
    test_flow = test_flow.map(uniform_sampler)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        for [x] in train_flow:
            print('Network initialized, first-batch loss is {:.6g}.\n'.format(
                session.run(init_loss, feed_dict={input_x: x})))
            break

        # train the network
        with spt.TrainLoop(
                tf.trainable_variables(),
                var_groups=[
                    'q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi'
                ],
                max_epoch=config.max_epoch,
                max_step=config.max_step,
                summary_dir=(results.system_path('train_summary')
                             if config.write_summary else None),
                summary_graph=tf.get_default_graph(),
                early_stopping=False,
                checkpoint_dir=results.system_path('checkpoint'),
                checkpoint_epoch_freq=100,
        ) as loop:

            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb,
                                          'adv_test_nll': adv_test_nll,
                                          'adv_test_lb': adv_test_lb,
                                          'reconstruct_energy':
                                          reconstruct_energy,
                                          'real_energy': real_energy,
                                          'pd_energy': pd_energy,
                                          'pn_energy': pn_energy,
                                          'test_recon': test_recon,
                                          'kl_adv_and_gaussian':
                                          kl_adv_and_gaussian,
                                          'test_mse': test_mse
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')

            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()

            epoch_iterator = loop.iter_epochs()

            n_critical = config.n_critical
            # adversarial training
            for epoch in epoch_iterator:
                step_iterator = MyIterator(train_flow)
                while step_iterator.has_next:
                    if epoch <= config.warm_up_start:
                        # generator training x
                        [_, batch_G_loss] = session.run([G_train_op, G_loss],
                                                        feed_dict={})
                        loop.collect_metrics(G_loss=batch_G_loss)
                    # vae training
                    for step, [x] in loop.iter_steps(
                            limited(step_iterator, n_critical)):
                        if epoch <= config.warm_up_start:
                            # discriminator training
                            [_, batch_D_loss, debug_loss
                             ] = session.run([D_train_op, D_loss, debug],
                                             feed_dict={
                                                 input_x: x,
                                             })
                            loop.collect_metrics(D_loss=batch_D_loss)
                            loop.collect_metrics(debug_loss=debug_loss)
                        else:
                            [
                                _, batch_VAE_loss, beta_value, xi_value,
                                batch_train_recon,
                                train_reconstruct_energy_value, training_D_loss
                            ] = session.run(
                                [
                                    VAE_train_op, VAE_loss, beta, xi_node,
                                    train_recon, train_reconstruct_energy,
                                    D_loss
                                ],
                                feed_dict={
                                    input_x:
                                    x,
                                    warm:
                                    min(
                                        1.0,
                                        1.0 * (epoch - config.warm_up_start) /
                                        config.warm_up_epoch)
                                })
                            loop.collect_metrics(batch_VAE_loss=batch_VAE_loss)
                            loop.collect_metrics(xi=xi_value)
                            loop.collect_metrics(beta=beta_value)
                            loop.collect_metrics(train_recon=batch_train_recon)
                            loop.collect_metrics(
                                train_reconstruct_energy=
                                train_reconstruct_energy_value)
                            loop.collect_metrics(
                                training_D_loss=training_D_loss)
                            # loop.print_logs()
                if epoch in config.lr_anneal_epoch_freq:
                    learning_rate.anneal()

                if epoch == config.warm_up_start:
                    learning_rate.set(config.initial_lr)

                if epoch % config.plot_epoch_freq == 0:
                    plot_samples(loop)

                if epoch % config.test_epoch_freq == 0:
                    log_Z_list = []
                    for i in range(config.log_Z_times):
                        log_Z_list.append(session.run(log_Z_compute_op))
                    from scipy.misc import logsumexp
                    log_Z = logsumexp(np.asarray(log_Z_list)) - np.log(
                        config.log_Z_times)
                    get_log_Z().set(log_Z)
                    print('log_Z_list:{}'.format(log_Z_list))
                    print('log_Z:{}'.format(log_Z))
                    with loop.timeit('eval_time'):
                        evaluator.run()

                if epoch == config.max_epoch:
                    dataset_img = np.concatenate([_x_train, _x_test], axis=0)

                    sample_img = []
                    for i in range((len(x_train) + len(x_test)) // 100 + 1):
                        sample_img.append(session.run(x_plots))
                    sample_img = np.concatenate(sample_img,
                                                axis=0).astype('uint8')
                    sample_img = sample_img[:len(dataset_img)]
                    sample_img = np.asarray(sample_img)

                    FID = get_fid(sample_img, dataset_img)
                    # turn to numpy array
                    IS_mean, IS_std = get_inception_score(sample_img)
                    loop.collect_metrics(FID=FID)
                    loop.collect_metrics(IS=IS_mean)

                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #11
0
ファイル: cifar10.py プロジェクト: shliujing/tfsnippet
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, config.x_dim),
                             name='input_x')
    input_y = tf.placeholder(dtype=tf.int32, shape=[None], name='input_y')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the loss, output and accuracy
    logits = model(input_x)
    cls_loss = tf.losses.sparse_softmax_cross_entropy(input_y, logits)
    loss = cls_loss + tf.losses.get_regularization_loss()
    y = spt.ops.softmax_classification_output(logits)
    acc = spt.ops.classification_accuracy(y, input_y)

    # derive the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    params = tf.trainable_variables()
    grads = optimizer.compute_gradients(loss, var_list=params)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.apply_gradients(grads)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_cifar10(x_shape=(config.x_dim,), normalize_x=True)
    train_flow = spt.DataFlow.arrays([x_train, y_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    test_flow = spt.DataFlow.arrays([x_test, y_test], config.test_batch_size)

    with spt.utils.create_session().as_default():
        # train the network
        with spt.TrainLoop(params,
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x, input_y],
                                  train_flow,
                                  metrics={
                                      'loss': loss,
                                      'acc': acc
                                  },
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={'test_acc': acc},
                                      inputs=[input_x, input_y],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=5)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #12
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting/sample', exist_ok=True)
    results.make_dirs('plotting/z_plot', exist_ok=True)
    results.make_dirs('plotting/train.reconstruct', exist_ok=True)
    results.make_dirs('plotting/test.reconstruct', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    posterior_flow = spt.layers.planar_normalizing_flows(
        config.nf_layers, name='posterior_flow')

    # input placeholders
    input_x = tf.placeholder(
        dtype=tf.float32, shape=(None,) + config.x_shape, name='input_x')
    warm = tf.placeholder(
        dtype=tf.float32, shape=(), name='warm')
    learning_rate = spt.AnnealingVariable(
        'learning_rate', config.initial_lr, config.lr_anneal_factor)
    beta = tf.Variable(initial_value=0.0, dtype=tf.float32, name='beta', trainable=True)

    # derive the loss for initializing
    with tf.name_scope('initialization'), \
         arg_scope([spt.layers.act_norm], initializing=True), \
         spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_pn_net = p_net(n_z=config.train_n_pz, beta=beta)
        init_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz)
        init_p_net = p_net(observed={'x': input_x, 'z': init_q_net['z']}, n_z=config.train_n_qz, beta=beta)
        init_loss = sum(get_all_loss(init_q_net, init_p_net, init_pn_net))

    # derive the loss and lower-bound for training
    with tf.name_scope('training'), \
         arg_scope([batch_norm], training=True):
        train_pn_theta = p_net(n_z=config.train_n_pz, beta=beta)
        train_pn_omega = p_omega_net(n_z=config.train_n_pz, beta=beta)
        train_log_Z = spt.ops.log_mean_exp(-train_pn_theta['z'].log_prob().energy - train_pn_theta['z'].log_prob())
        train_q_net = q_net(input_x, posterior_flow, n_z=config.train_n_qz)
        train_p_net = p_net(observed={'x': input_x, 'z': train_q_net['z']},
                            n_z=config.train_n_qz, beta=beta, log_Z=train_log_Z)

        VAE_loss, _, VAE_G_loss, VAE_D_real = get_all_loss(train_q_net, train_p_net, train_pn_theta, warm)
        _, D_loss, G_loss, D_real = get_all_loss(train_q_net, train_p_net, train_pn_omega, warm)

        VAE_loss += tf.losses.get_regularization_loss()
        VAE_G_loss += tf.losses.get_regularization_loss()
        D_loss += tf.losses.get_regularization_loss()
        G_loss += tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_qz)
        # test_pd_net = p_net(n_z=config.test_n_pz // 20, mcmc_iterator=20, beta=beta, log_Z=get_log_Z())
        test_pn_net = p_net(n_z=config.test_n_pz, mcmc_iterator=0, beta=beta, log_Z=get_log_Z())
        test_chain = test_q_net.chain(p_net, observed={'x': input_x}, n_z=config.test_n_qz, latent_axis=0,
                                      beta=beta, log_Z=get_log_Z())
        ele_test_recon = test_chain.model['x'].log_prob()
        ele_test_recon = tf.reduce_mean(ele_test_recon, axis=0)
        print(ele_test_recon.shape)
        test_recon = tf.reduce_mean(ele_test_recon)

        '''
        test_mse = tf.reduce_sum(
            (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) - tf.round(
                test_chain.model['x'] * 128 + 127.5)) ** 2, axis=[-1, -2, -3])  # (sample_dim, batch_dim, x_sample_dim)
        test_mse = tf.reduce_min(test_mse, axis=[0])
        test_mse = tf.reduce_mean(tf.reduce_mean(tf.reshape(
            test_mse, (-1, config.test_x_samples,)
        ), axis=-1))
        '''
        test_nll = -tf.reduce_mean(
            test_chain.vi.evaluation.is_loglikelihood()
        )
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

        vi = spt.VariationalInference(
            log_joint=test_chain.model['x'].log_prob() + test_chain.model['z'].distribution.log_prob(
                test_chain.model['z'], group_ndims=1, y=test_chain.model['x']
            ).log_energy_prob,
            latent_log_probs=[test_q_net['z'].log_prob()],
            axis=0
        )

        ele_grad = tf.gradients(D_psi(input_x), [input_x])[0]
        ele_grad_norm = tf.reduce_sum(tf.square(ele_grad), axis=[-1, -2, -3])

        ele_adv_test_nll = -vi.evaluation.is_loglikelihood()
        print(ele_adv_test_nll.shape)
        adv_test_nll = tf.reduce_mean(ele_adv_test_nll)
        ele_adv_test_lb = vi.lower_bound.elbo()
        print(ele_adv_test_lb.shape)
        adv_test_lb = tf.reduce_mean(ele_adv_test_lb)

        ele_real_energy = D_psi(test_chain.model['x'])
        real_energy = tf.reduce_mean(D_psi(input_origin_x))
        reconstruct_energy = tf.reduce_mean(D_psi(test_chain.model['x'].distribution.mean))
        pd_energy = tf.reduce_mean(
            D_psi(test_pn_net['x'].distribution.mean) * tf.exp(
                test_pn_net['z'].log_prob().log_energy_prob - test_pn_net['z'].log_prob()))
        pn_energy = tf.reduce_mean(D_psi(test_pn_net['x'].distribution.mean))
        log_Z_compute_op = spt.ops.log_mean_exp(
            -test_pn_net['z'].log_prob().energy - test_pn_net['z'].log_prob())

        another_log_Z_compute_op = spt.ops.log_mean_exp(
            -test_chain.model['z'].log_prob().energy - test_q_net['z'].log_prob() + np.log(config.len_train)
        )
        kl_adv_and_gaussian = tf.reduce_mean(
            test_pn_net['z'].log_prob() - test_pn_net['z'].log_prob().log_energy_prob
        )
    xi_node = get_var('p_net/xi')
    # derive the optimizer
    with tf.name_scope('optimizing'):
        VAE_params = tf.trainable_variables('q_net') + tf.trainable_variables('G_theta') + tf.trainable_variables(
            'beta') + tf.trainable_variables('posterior_flow') + tf.trainable_variables('p_net/xi')
        D_params = tf.trainable_variables('D_psi')
        VAE_G_params = tf.trainable_variables('G_theta')
        G_params = tf.trainable_variables('G_omega')
        print("========VAE_params=========")
        print(VAE_params)
        print("========D_params=========")
        print(D_params)
        print("========G_params=========")
        print(G_params)
        with tf.variable_scope('VAE_optimizer'):
            VAE_optimizer = tf.train.AdamOptimizer(learning_rate)
            VAE_grads = VAE_optimizer.compute_gradients(VAE_loss, VAE_params)
        with tf.variable_scope('VAE_G_optimizer'):
            VAE_G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999)
            VAE_G_grads = VAE_G_optimizer.compute_gradients(VAE_G_loss, VAE_G_params)
        with tf.variable_scope('D_optimizer'):
            D_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999)
            D_grads = D_optimizer.compute_gradients(D_loss, D_params)
        with tf.variable_scope('G_optimizer'):
            G_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5, beta2=0.999)
            G_grads = G_optimizer.compute_gradients(G_loss, G_params)
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            VAE_train_op = VAE_optimizer.apply_gradients(VAE_grads)
            VAE_G_train_op = VAE_optimizer.apply_gradients(VAE_G_grads)
            G_train_op = G_optimizer.apply_gradients(G_grads)
            D_train_op = D_optimizer.apply_gradients(D_grads)

    # prepare for training and testing data
    (_x_train, _y_train), (_x_test, _y_test) = spt.datasets.load_cifar10(x_shape=config.x_shape)
    x_train = (_x_train - 127.5) / 256.0 * 2
    x_test = (_x_test - 127.5) / 256.0 * 2
    # uniform_sampler = UniformNoiseSampler(-1.0 / 256.0, 1.0 / 256.0, dtype=np.float)
    train_flow = spt.DataFlow.arrays([x_train], config.test_batch_size)
    reconstruct_train_flow = spt.DataFlow.arrays(
        [x_train], 100, shuffle=True, skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays(
        [x_test], 100, shuffle=True, skip_incomplete=False)
    test_flow = spt.DataFlow.arrays(
        [x_test],
        config.test_batch_size)

    (svhn_train, _), (svhn_test, __) = load_svhn(config.x_shape)
    svhn_train = (svhn_train - 127.5) / 256.0 * 2
    svhn_test = (svhn_test - 127.5) / 256.0 * 2
    svhn_train_flow = spt.DataFlow.arrays([svhn_train], config.test_batch_size)
    svhn_test_flow = spt.DataFlow.arrays([svhn_test], config.test_batch_size)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        # for [x, origin_x] in train_flow:
        #     print('Network initialized, first-batch loss is {:.6g}.\n'.
        #           format(session.run(init_loss, feed_dict={input_x: x, input_origin_x: origin_x})))
        #     break

        # if config.z_dim == 512:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/48/19/6f3b6c3ef49ded8ba2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 1024:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/cd/19/6f9d69b5d1931e67e2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 2048:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/4d/19/6f9d69b5d19398c8c2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # elif config.z_dim == 3072:
        #     restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/5d/19/6f9d69b5d1936fb2d2d5/checkpoint/checkpoint/checkpoint.dat-390000'
        # else:
        # restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/93/0c/d434dabfcaecd3b5bed5/checkpoint/checkpoint/checkpoint.dat-195000'
        restore_checkpoint = '/mnt/mfs/mlstorage-experiments/cwx17/24/29/6fc8930042bc9bab75d5/checkpoint/checkpoint/checkpoint.dat-195000'
        # train the network
        with spt.TrainLoop(tf.trainable_variables(),
                           var_groups=['q_net', 'p_net', 'posterior_flow', 'G_theta', 'D_psi', 'G_omega',
                                       'D_kappa'],
                           max_epoch=config.max_epoch + 1,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False,
                           checkpoint_dir=results.system_path('checkpoint'),
                           checkpoint_epoch_freq=100,
                           restore_checkpoint=restore_checkpoint
                           ) as loop:

            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()

            def evaluator_generate(flow, preffix=''):
                return spt.Evaluator(
                    loop,
                    metrics={preffix + 'nll': test_nll,
                             preffix + 'lb': test_lb,
                             preffix + 'adv_nll': adv_test_nll,
                             preffix + 'adv_lb': adv_test_lb,
                             preffix + 'reconstruct_energy': reconstruct_energy,
                             preffix + 'real_energy': real_energy,
                             preffix + 'pd_energy': pd_energy,
                             preffix + 'pn_energy': pn_energy,
                             preffix + 'recon': test_recon,
                             preffix + 'kl_adv_and_gaussian': kl_adv_and_gaussian},
                    # preffix + 'mse': test_mse},
                    inputs=[input_x],
                    data_flow=flow,
                    time_metric_name=preffix + 'time'
                )

            cifar_train_evaluator = evaluator_generate(train_flow, 'cifar_train')
            cifar_test_evaluator = evaluator_generate(test_flow, 'cifar_test')
            svhn_train_evaluator = evaluator_generate(svhn_train_flow, 'svhn_train')
            svhn_test_evaluator = evaluator_generate(svhn_test_flow, 'svhn_test')

            epoch_iterator = loop.iter_epochs()

            # adversarial training
            for epoch in epoch_iterator:

                with loop.timeit('out_of_distribution_test'):
                    def get_ele(ops, flow):
                        packs = []
                        for [batch_x] in flow:
                            pack = session.run(
                                ops, feed_dict={
                                    input_x: batch_x
                                })  # [3, batch_size]
                            pack = np.transpose(np.asarray(pack), (1, 0))  # [batch_size, 3]
                            packs.append(pack)
                        packs = np.concatenate(packs, axis=0)  # [len_of_flow, 3]
                        packs = np.transpose(np.asarray(packs), (1, 0))  # [3, len_of_flow]
                        return packs

                    cifar_train_nll, cifar_train_lb, cifar_train_recon, cifar_train_energy, cifar_train_norm = get_ele(
                        [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], train_flow)
                    # print(cifar_train_nll.shape, cifar_train_lb.shape, cifar_train_recon.shape)

                    cifar_test_nll, cifar_test_lb, cifar_test_recon, cifar_test_energy, cifar_test_norm = get_ele(
                        [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm], test_flow)
                    svhn_train_nll, svhn_train_lb, svhn_train_recon, svhn_train_energy, svhn_train_norm = get_ele(
                        [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm],
                        svhn_train_flow)
                    svhn_test_nll, svhn_test_lb, svhn_test_recon, svhn_test_energy, svhn_test_norm = get_ele(
                        [ele_adv_test_nll, ele_adv_test_lb, ele_test_recon, ele_real_energy, ele_grad_norm],
                        svhn_test_flow)

                    def draw_nll(nll, color, label):
                        nll = list(nll)
                        # print(nll)
                        # print(nll.shape)
                        n, bins, patches = pyplot.hist(nll, 40, normed=True, facecolor=color, alpha=0.4,
                                                       label=label)

                        index = []
                        for i in range(len(bins) - 1):
                            index.append((bins[i] + bins[i + 1]) / 2)

                        def smooth(c, N=5):
                            weights = np.hanning(N)
                            return np.convolve(weights / weights.sum(), c)[N - 1:-N + 1]

                        n[2:-2] = smooth(n)
                        pyplot.plot(index, n, color=color)
                        pyplot.legend()
                        print('%s done.' % label)

                    # Draw the histogram or exrta the data here
                    def plot_fig(data_list, color_list, label_list, x_label, fig_name):
                        pyplot.cla()
                        pyplot.plot()
                        pyplot.grid(c='silver', ls='--')
                        pyplot.xlabel(x_label)
                        spines = pyplot.gca().spines
                        for sp in spines:
                            spines[sp].set_color('silver')

                        def draw_nll(nll, color, label):
                            nll = list(nll)
                            # print(nll)
                            # print(nll.shape)

                            n, bins, patches = pyplot.hist(nll, 40, normed=True, facecolor=color, alpha=0.4,
                                                           label=label)

                            index = []
                            for i in range(len(bins) - 1):
                                index.append((bins[i] + bins[i + 1]) / 2)

                            def smooth(c, N=5):
                                weights = np.hanning(N)
                                return np.convolve(weights / weights.sum(), c)[N - 1:-N + 1]

                            n[2:-2] = smooth(n)
                            pyplot.plot(index, n, color=color)
                            pyplot.legend()
                            print('%s done.' % label)

                        for i in range(len(data_list)):
                            draw_nll(data_list[i], color_list[i], label_list[i])
                        pyplot.savefig('plotting/wgan/%s.jpg' % fig_name)

                        def draw_curve(cifar_test, svhn_test, fig_name):
                            label = np.concatenate(([1] * len(cifar_test), [-1] * len(svhn_test)))
                            score = np.concatenate((cifar_test, svhn_test))

                            fpr, tpr, thresholds = roc_curve(label, score)
                            precision, recall, thresholds = precision_recall_curve(label, score)
                            pyplot.plot(recall, precision)
                            pyplot.plot(fpr, tpr)
                            print('%s auc: %4f, ap: %4f' % (fig_name, auc(fpr, tpr), average_precision_score(label, score)))

                        pyplot.cla()
                        pyplot.plot()
                        draw_curve(data_list[1], data_list[3], fig_name)
                        pyplot.savefig('plotting/wgan/%s_curve.jpg' % fig_name)

                    plot_fig([cifar_train_energy, cifar_test_energy, svhn_train_energy, svhn_test_energy],
                             ['red', 'salmon', 'green', 'lightgreen'],
                             ['CIFAR-10 Train', 'CIFAR-10 Test', 'SVHN Train', 'SVHN Test'],
                             'energy', 'out_of_distribution_energy')

                    plot_fig([cifar_train_norm, cifar_test_norm, svhn_train_norm, svhn_test_norm],
                             ['red', 'salmon', 'green', 'lightgreen'],
                             ['CIFAR-10 Train', 'CIFAR-10 Test', 'SVHN Train', 'SVHN Test'],
                             'log(bits/dim)', 'out_of_distribution_norm')

                with loop.timeit('eval_time'):
                    cifar_train_evaluator.run()
                    cifar_test_evaluator.run()
                    svhn_train_evaluator.run()
                    svhn_test_evaluator.run()

                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #13
0
def main(config, result_dir):
    # print the config
    print_with_title('Configurations', config.format_config(), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(result_dir)
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')
    learning_rate = tf.placeholder(shape=(), dtype=tf.float32)
    learning_rate_var = AnnealingDynamicValue(config.initial_lr,
                                              config.lr_anneal_factor)

    # build the model
    with arg_scope([q_net, p_net], is_training=is_training):
        # derive the loss and lower-bound for training
        train_q_net = q_net(input_x)
        train_chain = train_q_net.chain(p_net,
                                        latent_names=['z'],
                                        latent_axis=0,
                                        observed={'x': input_x})

        baseline = baseline_net(input_x)
        cost, baseline_cost = \
            train_chain.vi.training.reinforce(baseline=baseline)
        loss = regularization_loss() + tf.reduce_mean(cost + baseline_cost)

        # derive the nll and logits output for testing
        test_q_net = q_net(input_x, n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_names=['z'],
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    params = tf.trainable_variables()
    grads = optimizer.compute_gradients(loss, var_list=params)
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plot_x'):
        plot_p_net = p_net(n_z=100, is_training=is_training)
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            session = get_default_session_or_error()
            images = session.run(x_plots, feed_dict={is_training: False})
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10),
                                   results=results)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with create_session().as_default():
        # train the network
        with TrainLoop(params,
                       max_epoch=config.max_epoch,
                       max_step=config.max_step,
                       summary_dir=(results.system_path('train_summary')
                                    if config.write_summary else None),
                       summary_graph=tf.get_default_graph(),
                       early_stopping=False) as loop:
            trainer = Trainer(loop,
                              train_op, [input_x],
                              train_flow,
                              feed_dict={
                                  learning_rate: learning_rate_var,
                                  is_training: True
                              },
                              metrics={'loss': loss})
            trainer.anneal_after(learning_rate_var,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = Evaluator(loop,
                                  metrics={
                                      'test_nll': test_nll,
                                      'test_lb': test_lb
                                  },
                                  inputs=[input_x],
                                  data_flow=test_flow,
                                  feed_dict={is_training: False},
                                  time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #14
0
ファイル: vae.py プロジェクト: shliujing/tfsnippet
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # derive the output for initialization
    with tf.name_scope('initialization'), \
            spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_q_net = q_net(input_x, is_initializing=True)
        init_chain = init_q_net.chain(p_net,
                                      observed={'x': input_x},
                                      is_initializing=True)
        init_lb = tf.reduce_mean(init_chain.vi.lower_bound.elbo())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x)
        train_chain = train_q_net.chain(p_net, observed={'x': input_x})
        vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(n_z=100)
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10),
                                   results=results)

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = \
        spt.datasets.load_mnist(x_shape=[784])
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        for [x] in train_flow:
            print('Network initialized, first-batch loss is {:.6g}.\n'.format(
                session.run(init_lb, feed_dict={input_x: x})))
            break

        # train the network
        with spt.TrainLoop(params,
                           var_groups=['q_net', 'p_net'],
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss},
                                  summaries=tf.summary.merge_all(
                                      spt.GraphKeys.AUTO_HISTOGRAM))
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.events.on(
                spt.EventKeys.AFTER_EXECUTION,
                lambda e: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #15
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \
                 str(config.noExp) + ".model"
    dirName = os.path.basename(__file__).split(".py")[0] + "_" + str(
        config.noExp)
    results = MLResults(os.path.join(config.result_dir, dirName))
    results.save_config(config)  # save experiment settings
    results.make_dirs('train_summary', exist_ok=True)
    results.make_dirs('result_summary', exist_ok=True)
    results.make_dirs('mid_summary', exist_ok=True)

    # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number

    # input placeholders
    input_x = tf.placeholder(dtype=tf.float32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate',
                                          config.initial_lr,
                                          config.lr_anneal_factor,
                                          min_value=1e-6)
    multi_gpu = MultiGPU(disable_prebuild=True)
    # multi_gpu = MultiGPU()

    # derive the training operation
    gradses = []
    grad_vars = []
    train_losses = []
    BATCH_SIZE = get_batch_size(input_x)

    for dev, pre_build, [dev_input_x
                         ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]):
        with tf.device(dev), multi_gpu.maybe_name_scope(dev):
            # derive the loss for initializing
            with tf.name_scope('initialization'), \
                    arg_scope([p_net, q_net], is_initializing=True), \
                    spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
                init_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                init_chain = init_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                init_loss = tf.reduce_mean(init_chain.vi.training.vimco())

            # derive the loss and lower-bound for training
            with tf.name_scope('training'), \
                    arg_scope([p_net, q_net], is_training=True):
                train_q_net = q_net(dev_input_x, n_z=config.train_n_samples)
                train_chain = train_q_net.chain(p_net,
                                                latent_axis=0,
                                                observed={'x': dev_input_x})
                train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) +
                              tf.losses.get_regularization_loss())
                train_losses.append(train_loss)

            # derive the logits output for testing
            with tf.name_scope('testing'):
                test_q_net = q_net(dev_input_x, n_z=config.test_n_z)
                test_chain = test_q_net.chain(p_net,
                                              latent_axis=0,
                                              observed={'x': dev_input_x})
                # log_prob of X and each univariate time series of X
                log_prob = tf.reduce_mean(
                    test_chain.model['x'].distribution.log_prob(dev_input_x),
                    0)
                log_prob_per_element = tf.reduce_sum(log_prob)
                log_prob_per_element_univariate_TS = tf.reduce_sum(
                    log_prob, [0, 1, 3])
                log_prob_per_element_univariate_TS_All = tf.reduce_sum(
                    log_prob, [1, 3])

            # derive the optimizer
            with tf.name_scope('optimizing'):
                params = tf.trainable_variables()
                optimizer = tf.train.AdamOptimizer(learning_rate)
                grads = optimizer.compute_gradients(train_loss, params)
                for grad, var in grads:
                    if grad is not None and var is not None:
                        if config.grad_clip_norm:
                            grad = tf.clip_by_norm(grad, config.grad_clip_norm)
                        if config.check_numerics:
                            grad = tf.check_numerics(
                                grad,
                                'gradient for {} has numeric issue'.format(
                                    var.name))
                        grad_vars.append((grad, var))
                gradses.append(grad_vars)

    # merge multi-gpu outputs and operations
    [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE)
    train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses),
                                     optimizer=optimizer,
                                     control_inputs=tf.get_collection(
                                         tf.GraphKeys.UPDATE_OPS))

    # sort the contribution of each univariate_TS of input
    SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32,
                                              shape=(None, None),
                                              name='SORT_UNIVARIATE_TS_INPUT')
    SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT,
                                     k=config.metricNumber).indices + 1

    # load the training and testing data
    print("=" * 10 + "Shape of Input data" + "=" * 10)
    x, time_indexs, x_test, time_indexs2 = load_matrix_allData(
        config.dataReadformat, config.datapathForTrain, config.datapathForTest,
        config.timeLength, config.metricNumber, "TrainFileNameList.txt",
        "TestFileNameList.txt", results, config.norm)

    x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1])
    print("Test:", x_test.shape)
    if config.batchTest:
        test_flow = DataFlow.arrays(
            [x_test], config.test_batch_size)  # DataFlow is iterator
        del x_test
    x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION)
    x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1])
    x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1])
    train_flow = DataFlow.arrays([x_train],
                                 config.batch_size,
                                 shuffle=False,
                                 skip_incomplete=True)
    val_flow = DataFlow.arrays([x_val], config.test_batch_size)
    print("Note:", config.x_dim,
          ", x_dim = size of datapoint = timeLength * metricNumber")
    print("Input data shape:", x.shape, "Train data shape:", x_train.shape,
          "Validation data shape:", x_val.shape)
    del x_train, x_val, x

    # training part
    with spt.utils.create_session().as_default() as session:
        spt.utils.ensure_variables_initialized()
        saver = CheckpointSaver(tf.trainable_variables(), model_file)
        if os.path.exists(model_file):
            # load the parameters of trained model
            saver.restore_latest()
        else:
            # initialize the network
            while True:
                breakFlag = 0
                for [x] in train_flow:
                    INITLOSS = session.run(init_loss, feed_dict={input_x: x})
                    print('Network initialized, first-batch loss is {:.6g}.'.
                          format(INITLOSS))
                    if np.isnan(INITLOSS) or np.isinf(
                            INITLOSS) or INITLOSS > 10**5:
                        pass
                    else:
                        breakFlag = 1
                        break
                if breakFlag:
                    break

            # train the network
            with train_flow.threaded(10) as train_flow:
                with spt.TrainLoop(
                        params,
                        var_groups=['q_net', 'p_net'],
                        max_epoch=config.max_epoch,
                        max_step=config.max_step,
                        summary_dir=(results.system_path('train_summary')
                                     if config.write_summary else None),
                        summary_graph=tf.get_default_graph(),
                        early_stopping=True) as loop:
                    trainer = spt.Trainer(loop,
                                          train_op, [input_x],
                                          train_flow,
                                          metrics={'loss': train_loss},
                                          summaries=tf.summary.merge_all(
                                              spt.GraphKeys.AUTO_HISTOGRAM))
                    # anneal the learning rate
                    trainer.anneal_after(learning_rate,
                                         epochs=config.lr_anneal_epoch_freq,
                                         steps=config.lr_anneal_step_freq)
                    validator = spt.Validator(
                        loop,
                        train_loss,
                        [input_x],
                        val_flow,
                    )
                    trainer.evaluate_after_epochs(validator, freq=10)
                    trainer.log_after_epochs(freq=1)
                    trainer.run()
                saver.save()

            # save the training infomation
            firWrite = True
            num = 0
            time0 = time.time()
            for [x_train] in train_flow:
                if config.savetrainDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item_Train = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_train}))
                    log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train
                    log_prob_per_element_list_Train = np.sum(np.array(
                        log_prob_per_element_univariate_TS_list_item_Train),
                                                             axis=1).tolist()
                    if firWrite:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train)
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train)
                    else:
                        save_file(
                            results.system_path("train_summary"),
                            "OutlierScores_metric.txt",
                            log_prob_per_element_univariate_TS_list_Train,
                            "\n", "a")
                        save_file(results.system_path("train_summary"),
                                  "OutlierScores.txt",
                                  log_prob_per_element_list_Train, "\n", "a")

                firWrite = False
                num += 1
                if num % 1000 == 0:
                    print(
                        "-----Train %s >>>>>:Sum time of batch instances:%s" %
                        (num, float(time.time() - time0) / float(num)))
            del train_flow, val_flow

        # online test
        time2 = time.time()
        log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], []
        if config.batchTest:
            num = 0
            for [x_test] in test_flow:
                if config.savetestDS:
                    # log prob of each metric of each instance
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS_All,
                                    feed_dict={input_x: x_test}))
                    log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist(
                    )
                    log_prob_per_element_list += np.sum(
                        np.array(log_prob_per_element_univariate_TS_list_item),
                        axis=1).tolist()

                num += 1
                if num % 200 == 0:
                    print("-----Test %s >>>>>:Sum time of batch instances:%s" %
                          (num, float(time.time() - time2) / float(num)))
        else:
            num = 1
            for batch_x in x_test:
                if config.savetestTS:
                    log_prob_per_element_list_item = (session.run(
                        log_prob_per_element, feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_list.append(
                        log_prob_per_element_list_item)

                if config.savetestDS:
                    log_prob_per_element_univariate_TS_list_item = (
                        session.run(log_prob_per_element_univariate_TS,
                                    feed_dict={input_x: [batch_x]}))
                    log_prob_per_element_univariate_TS_list.append(
                        log_prob_per_element_univariate_TS_list_item)
                    log_prob_per_element_list.append(
                        sum(log_prob_per_element_univariate_TS_list_item))

                if num % 200 == 0:
                    print(
                        "-----Test>>>>>:%d, average time of each instance:%s" %
                        (num, float(time.time() - time2) / float(num)))
                num += 1

        # get the lable file name and its line cnt number
        allLabelFileNameLineCntList = get_machineID(results, config.labelpath)

        print("No of OutlierScores for all dataPoint:(%s):" %
              len(log_prob_per_element_list))
        if config.savetestDS:
            save_file(
                results.system_path("result_summary"),
                "OutlierScores_metric.txt",
                cat_List(allLabelFileNameLineCntList,
                         log_prob_per_element_univariate_TS_list))
        save_file(
            results.system_path("result_summary"), "OutlierScores.txt",
            cat_List(allLabelFileNameLineCntList, log_prob_per_element_list))

        if config.evaluation:
            # Prepraration for the hitory two-metric results
            twoMetricScore = read_file(results.system_path("train_summary"),
                                       "OutlierScores_metric.txt")
            ave_twoMetricScore = np.mean(np.array(twoMetricScore),
                                         axis=0).tolist()
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ["Average score of each univariate time series", "\n"],
                      ",")
            save_file(results.system_path("result_summary"), "PRF.txt",
                      ave_twoMetricScore + ["\n"], ",", "a")
            save_file(results.system_path("result_summary"), "PRF.txt", [
                "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n"
            ], ",", "a")

            # get the sorted item each metric by change score
            twoMetricScoreList = cal_scoreChanges(
                log_prob_per_element_list, ave_twoMetricScore,
                log_prob_per_element_univariate_TS_list)
            MetricResult = session.run(
                SORT_UNIVARIATE_TS,
                feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList})
            save_file(results.system_path("result_summary"),
                      "MetricResult.txt",
                      cat_List(allLabelFileNameLineCntList, MetricResult))

            # POT evalution
            POT_TH = pot_eval(
                read_file(results.system_path("train_summary"),
                          "OutlierScores.txt", "float"), config.q,
                config.level)
            resultArray, outlierLabelfileNameLineCntList = cal_binaryResult(
                log_prob_per_element_list, POT_TH, time_indexs2,
                config.saveMetricInfo, allLabelFileNameLineCntList)
            evaluate(results, config.labelpath, resultArray, time_indexs2,
                     POT_TH)

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()

    interpretation_hit_ratio(truth_filepath=config.interpret_filepath,
                             prediction_filepath=os.path.join(
                                 config.result_dir, dirName, "result_summary",
                                 "MetricResult.txt"))
コード例 #16
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser)
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, config.x_dim),
                             name='input_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)

    # build the posterior flow
    with tf.variable_scope('posterior_flow'):
        flows = []
        for i in range(config.n_flows):
            flows.append(spt.layers.ActNorm())
            flows.append(
                spt.layers.CouplingLayer(tf.make_template(
                    'coupling',
                    coupling_layer_shift_and_scale,
                    create_scope_now_=True),
                                         scale_type='exp'))
            flows.append(spt.layers.InvertibleDense())
        posterior_flow = spt.layers.SequentialFlow(flows=flows)

    # derive the initialization op
    with tf.name_scope('initialization'), \
            arg_scope([spt.layers.act_norm], initializing=True):
        init_q_net = q_net(input_x, posterior_flow)
        init_chain = init_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        init_loss = tf.reduce_mean(init_chain.vi.training.sgvb())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'):
        train_q_net = q_net(input_x, posterior_flow)
        train_chain = train_q_net.chain(p_net,
                                        latent_axis=0,
                                        observed={'x': input_x})

        vae_loss = tf.reduce_mean(train_chain.vi.training.sgvb())
        loss = vae_loss + tf.losses.get_regularization_loss()

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(input_x, posterior_flow, n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': input_x})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())

    # derive the optimizer
    with tf.name_scope('optimizing'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        grads = optimizer.compute_gradients(loss, var_list=params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        plot_p_net = p_net(n_z=100)
        x_plots = tf.reshape(bernoulli_as_pixel(plot_p_net['x']), (-1, 28, 28))

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(images=images,
                                   filename='plotting/{}.png'.format(
                                       loop.epoch),
                                   grid_size=(10, 10))

    # prepare for training and testing data
    (x_train, y_train), (x_test, y_test) = spt.datasets.load_mnist()
    train_flow = bernoulli_flow(x_train,
                                config.batch_size,
                                shuffle=True,
                                skip_incomplete=True)
    test_flow = bernoulli_flow(x_test, config.test_batch_size, sample_now=True)

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        # initialize the network
        spt.utils.ensure_variables_initialized()
        for [batch_x] in train_flow:
            print('Network initialization loss: {:.6g}'.format(
                session.run(init_loss, {input_x: batch_x})))
            print('')
            break

        # train the network
        with spt.TrainLoop(params,
                           var_groups=['p_net', 'q_net', 'posterior_flow'],
                           max_epoch=config.max_epoch,
                           max_step=config.max_step,
                           summary_dir=(results.system_path('train_summary')
                                        if config.write_summary else None),
                           summary_graph=tf.get_default_graph(),
                           early_stopping=False) as loop:
            trainer = spt.Trainer(loop,
                                  train_op, [input_x],
                                  train_flow,
                                  metrics={'loss': loss})
            trainer.anneal_after(learning_rate,
                                 epochs=config.lr_anneal_epoch_freq,
                                 steps=config.lr_anneal_step_freq)
            evaluator = spt.Evaluator(loop,
                                      metrics={
                                          'test_nll': test_nll,
                                          'test_lb': test_lb
                                      },
                                      inputs=[input_x],
                                      data_flow=test_flow,
                                      time_metric_name='test_time')
            evaluator.after_run.add_hook(
                lambda: results.update_metrics(evaluator.last_metrics_dict))
            trainer.evaluate_after_epochs(evaluator, freq=10)
            trainer.evaluate_after_epochs(functools.partial(
                plot_samples, loop),
                                          freq=10)
            trainer.log_after_epochs(freq=1)
            trainer.run()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()
コード例 #17
0
def main():
    # parse the arguments
    arg_parser = ArgumentParser()
    spt.register_config_arguments(config, arg_parser, title='Model options')
    spt.register_config_arguments(spt.settings,
                                  arg_parser,
                                  prefix='tfsnippet',
                                  title='TFSnippet options')
    arg_parser.parse_args(sys.argv[1:])

    # print the config
    print_with_title('Configurations', pformat(config.to_dict()), after='\n')

    # open the result object and prepare for result directories
    results = MLResults(config.result_dir)
    results.save_config(config)  # save experiment settings for review
    results.make_dirs('plotting', exist_ok=True)
    results.make_dirs('train_summary', exist_ok=True)

    # input placeholders
    input_x = tf.placeholder(dtype=tf.int32,
                             shape=(None, ) + config.x_shape,
                             name='input_x')
    input_origin_x = tf.placeholder(dtype=tf.float32,
                                    shape=(None, ) + config.x_shape,
                                    name='input_origin_x')
    learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr,
                                          config.lr_anneal_factor)
    input_x = tf.to_float(input_x)

    # derive the loss for initializing
    with tf.name_scope('initialization'), \
         arg_scope([p_net, q_net], is_initializing=True), \
         spt.utils.scoped_set_config(spt.settings, auto_histogram=False):
        init_q_net = q_net(
            input_origin_x if config.use_q_z_given_e else input_x)
        init_chain = init_q_net.chain(
            p_net,
            observed={
                'x':
                input_origin_x if config.use_origin_x_as_observe else input_x
            })
        init_loss = tf.reduce_mean(init_chain.vi.training.sgvb())

    # derive the loss and lower-bound for training
    with tf.name_scope('training'), \
         arg_scope([p_net, q_net], is_training=True):
        train_q_net = q_net(
            input_origin_x if config.use_q_z_given_e else input_x)
        train_chain = train_q_net.chain(
            p_net,
            observed={
                'x':
                input_origin_x if config.use_origin_x_as_observe else input_x
            })
        train_loss = (tf.reduce_mean(train_chain.vi.training.sgvb()) +
                      tf.losses.get_regularization_loss())

    # derive the nll and logits output for testing
    with tf.name_scope('testing'):
        test_q_net = q_net(
            input_origin_x if config.use_q_z_given_e else input_x,
            n_z=config.test_n_z)
        test_chain = test_q_net.chain(p_net,
                                      latent_axis=0,
                                      observed={'x': tf.to_float(input_x)})
        test_nll = -tf.reduce_mean(test_chain.vi.evaluation.is_loglikelihood())
        test_lb = tf.reduce_mean(test_chain.vi.lower_bound.elbo())
        test_mse = tf.reduce_sum(
            (tf.round(test_chain.model['x'].distribution.mean * 128 + 127.5) -
             tf.round(input_origin_x * 128 + 127.5))**2,
            axis=[-1, -2, -3])  # (sample_dim, batch_dim)
        test_mse = tf.reduce_min(test_mse, axis=[0])
        test_mse = tf.reduce_mean(test_mse)

    # derive the optimizer
    with tf.name_scope('optimizing'):
        params = tf.trainable_variables()
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads = optimizer.compute_gradients(train_loss, params)
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            train_op = optimizer.apply_gradients(grads)

    # derive the plotting function
    with tf.name_scope('plotting'):
        x_plots = tf.reshape(bernoulli_as_pixel(p_net(n_z=100)['x']),
                             (-1, ) + config.x_shape)

    def plot_samples(loop):
        with loop.timeit('plot_time'):
            images = session.run(x_plots)
            save_images_collection(
                images=images,
                filename='plotting/{}.png'.format(loop.epoch),
                grid_size=(10, 10),
                results=results,
                channels_last=config.channels_last,
            )

    # prepare for training and testing data
    (_x_train, _y_train), (_x_test, _y_test) = \
        spt.datasets.load_mnist(x_shape=config.x_shape)
    # train_flow = bernoulli_flow(
    #     x_train, config.batch_size, shuffle=True, skip_incomplete=True)
    x_train = _x_train / 255.0
    x_test = _x_test / 255.0
    bernouli_sampler = BernoulliSampler()
    train_flow = spt.DataFlow.arrays([x_train, x_train],
                                     config.batch_size,
                                     shuffle=True,
                                     skip_incomplete=True)
    train_flow = train_flow.map(lambda x, y: [bernouli_sampler.sample(x), y])
    Z_compute_flow = spt.DataFlow.arrays([x_train, x_train],
                                         config.test_batch_size,
                                         shuffle=True,
                                         skip_incomplete=True)
    Z_compute_flow = Z_compute_flow.map(
        lambda x, y: [bernouli_sampler.sample(x), y])
    reconstruct_train_flow = spt.DataFlow.arrays([x_train],
                                                 100,
                                                 shuffle=True,
                                                 skip_incomplete=False)
    reconstruct_test_flow = spt.DataFlow.arrays([x_test],
                                                100,
                                                shuffle=True,
                                                skip_incomplete=False)

    test_flow = spt.DataFlow.arrays([x_test, x_test], config.test_batch_size)
    test_flow = test_flow.map(lambda x, y: [bernouli_sampler.sample(x), y])

    with spt.utils.create_session().as_default() as session, \
            train_flow.threaded(5) as train_flow:
        spt.utils.ensure_variables_initialized()

        # initialize the network
        for [x, ox] in train_flow:
            print('Network initialized, first-batch loss is {:.6g}.\n'.format(
                session.run(init_loss,
                            feed_dict={
                                input_x: x,
                                input_origin_x: ox
                            })))
            break

        # train the network
        with spt.TrainLoop(
                params,
                var_groups=['q_net', 'p_net'],
                max_epoch=config.max_epoch + 1,
                max_step=config.max_step,
                summary_dir=(results.system_path('train_summary')
                             if config.write_summary else None),
                summary_graph=tf.get_default_graph(),
                checkpoint_dir=results.system_path('checkpoint'),
                checkpoint_epoch_freq=100,
                early_stopping=False,
                restore_checkpoint=
                "/mnt/mfs/mlstorage-experiments/cwx17/10/1c/d4e63c432be97afba7e5/checkpoint/checkpoint/checkpoint.dat-140400"
        ) as loop:

            loop.print_training_summary()
            spt.utils.ensure_variables_initialized()

            epoch_iterator = loop.iter_epochs()
            for epoch in epoch_iterator:
                dataset_img = np.tile(_x_train, (1, 1, 1, 3))
                mala_img = []
                for i in range(config.fid_samples // config.sample_n_z):
                    mala_images = session.run(x_plots)
                    mala_img.append(mala_images)
                    print('{}-th sample finished...'.format(i))

                mala_img = np.concatenate(mala_img, axis=0).astype('uint8')
                mala_img = np.asarray(mala_img)
                mala_img = np.tile(mala_img, (1, 1, 1, 3))
                np.savez('sample_store', mala_img=mala_img)

                FID = get_fid(mala_img, dataset_img)
                IS_mean, IS_std = get_inception_score(mala_img)
                loop.collect_metrics(FID=FID)
                loop.collect_metrics(IS=IS_mean)

                # ori_img = np.concatenate(ori_img, axis=0).astype('uint8')
                # ori_img = np.asarray(ori_img)
                # FID = get_fid_google(ori_img, dataset_img)
                # IS_mean, IS_std = get_inception_score(ori_img)
                # loop.collect_metrics(FID_ori=FID)
                # loop.collect_metrics(IS_ori=IS_mean)

                loop.collect_metrics(lr=learning_rate.get())
                loop.print_logs()

    # print the final metrics and close the results object
    print_with_title('Results', results.format_metrics(), before='\n')
    results.close()