Python ConfigProto Examples

Programming Language: Python

Namespace/Package Name: tensorflow.compat.v1

Method/Function: ConfigProto

Examples at hotexamples.com: 30

Python ConfigProto - 30 examples found. These are the top rated real world Python examples of tensorflow.compat.v1.ConfigProto extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: experiment.py Project: sarthakksu/covid-low-income-bam

def run_eval():
    """Evaluate on test or validation."""
    with tf.Graph().as_default():
        # Input images and labels.
        features = get_features(False, 5)
        model = f_model.multi_gpu_model
        result = model(features)
        merged = result['summary']
        correct_prediction_sum = result['correct']
        almost_correct_sum = result['almost']
        saver = tf.train.Saver()
        test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')
        seen_step = -1
        time.sleep(3 * 60)
        paused = 0
        while paused < 360:
            ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir + '/train/')
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoin
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
            else:
                time.sleep(2 * 60)
                paused += 2
                continue
            while seen_step == int(global_step):
                time.sleep(2 * 60)
                ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir +
                                                     '/train/')
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
                paused += 2
                if paused > 360:
                    test_writer.close()
                    return
            paused = 0

            seen_step = int(global_step)
            print(seen_step)
            sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
            saver.restore(sess, ckpt.model_checkpoint_path)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            try:
                total_tp = 0
                total_almost = 0
                for i in range(FLAGS.eval_size // 5):
                    summary_j, tp, almost = sess.run(
                        [merged, correct_prediction_sum, almost_correct_sum])
                    total_tp += tp
                    total_almost += almost

                total_false = FLAGS.eval_size - total_tp
                total_almost_false = FLAGS.eval_size - total_almost
                summary_tp = tf.Summary.FromString(summary_j)
                summary_tp.value.add(tag='correct_prediction',
                                     simple_value=total_tp)
                summary_tp.value.add(tag='wrong_prediction',
                                     simple_value=total_false)
                summary_tp.value.add(tag='almost_wrong_prediction',
                                     simple_value=total_almost_false)
                test_writer.add_summary(summary_tp, global_step)
                print('write done')
            except tf.errors.OutOfRangeError:
                print('Done eval for %d steps.' % i)
            finally:
                # When done, ask the threads to stop.
                coord.request_stop()
            # Wait for threads to finish.
            coord.join(threads)
            sess.close()
        test_writer.close()

Example #2

Show file

File: inference.py Project: vitkaLA/automl

 def _build_session(self):
     sess_config = tf.ConfigProto()
     if self.use_xla:
         sess_config.graph_options.optimizer_options.global_jit_level = (
             tf.OptimizerOptions.ON_2)
     return tf.Session(config=sess_config)

Example #3

Show file

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
random.seed(1111)
np.random.seed(1111)
tf.set_random_seed(1111)

train_batch_size = 128
test_batch_size = 128
predict_batch_size = 1
predict_users_num = 100
predict_ads_num = 99

info = pkl.load(open('ali_test_info_4days.pkl', 'rb'))

tf.reset_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
model = Model(info[0], info[1], info[2], info[3], predict_batch_size,
              predict_ads_num)
sess.run(tf.global_variables_initializer())
model.restore_(sess, './save_path_alibaba_new/ckpt')

knn_key = pkl.load(open('knn_table/ali_knn_key.pkl', 'rb'))

mypath = './test_data'
files = listdir(mypath)
csv_list = []
for f in files:
    fullpath = join(mypath, f)
    if isfile(fullpath):

Example #4

Show file

import facenet
import detect_face
import os
import time
import pickle
import sys

img_path = 'abc.jpg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy = './npy'
train_img = "./train_img"

with tf.Graph().as_default():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    with sess.as_default():
        pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

        minsize = 20  # minimum size of face
        threshold = [0.6, 0.7, 0.7]  # three steps's threshold
        factor = 0.709  # scale factor
        margin = 44
        frame_interval = 3
        batch_size = 1000
        image_size = 182
        input_image_size = 160

        HumanNames = os.listdir(train_img)
        HumanNames.sort()

Example #5

Show file

File: autoencoder_mnist.py Project: ntselepidis/kfac

def main(_):

    # If using update_damping_immediately resource variables must be enabled.
    # Would recommend always enabling them anyway.
    if FLAGS.update_damping_immediately:
        tf.enable_resource_variables()

    if FLAGS.use_control_flow_v2:
        tf.enable_control_flow_v2()

    if not FLAGS.auto_register_layers and FLAGS.use_keras_model:
        raise ValueError('Require auto_register_layers=True when using Keras '
                         'model.')

    tf.set_random_seed(FLAGS.seed)
    (train_op, opt, batch_loss, batch_error, batch_size_schedule,
     batch_size) = construct_train_quants()

    global_step = tf.train.get_or_create_global_step()

    if FLAGS.optimizer == 'kfac':
        # We need to put the control depenency on train_op here so that we are
        # guaranteed to get the up-to-date values of these various quantities.
        # Otherwise there is a race condition and we might get the old values,
        # nondeterministically. Another solution would be to get these values in
        # a separate sess.run call, but this can sometimes cause problems with
        # training frameworks that use hooks (see the comments below).
        with tf.control_dependencies([train_op]):
            learning_rate = opt.learning_rate
            momentum = opt.momentum
            damping = opt.damping
            rho = opt.rho
            qmodel_change = opt.qmodel_change

    # Without setting allow_soft_placement=True there will be problems when
    # the optimizer tries to place certain ops like "mod" on the GPU (which isn't
    # supported).
    config = tf.ConfigProto(allow_soft_placement=True)

    # It's good practice to put everything into a single sess.run call. The
    # reason is that certain "training frameworks" like to run hooks at each
    # sess.run call, and there is an implicit expectation there will only
    # be one sess.run call every "iteration" of the "optimizer". For example,
    # a framework might try to print the loss at each sess.run call, causing
    # the mini-batch to be advanced, thus completely breaking the "cached
    # batch" mechanism that the damping adaptation method may rely on. (Plus
    # there will also be the extra cost of having to reevaluate the loss
    # twice.)  That being said we don't completely do that here because it's
    # inconvenient.

    # Train model.
    with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30,
                                           config=config) as sess:
        for _ in range(FLAGS.train_steps):
            i = sess.run(global_step)

            if FLAGS.use_batch_size_schedule:
                batch_size_ = batch_size_schedule[min(
                    i,
                    len(batch_size_schedule) - 1)]
            else:
                batch_size_ = FLAGS.batch_size

            if FLAGS.optimizer == 'kfac':
                (_, batch_loss_, batch_error_, learning_rate_, momentum_,
                 damping_, rho_, qmodel_change_) = sess.run(
                     [
                         train_op, batch_loss, batch_error, learning_rate,
                         momentum, damping, rho, qmodel_change
                     ],
                     feed_dict={batch_size: batch_size_})
            else:
                _, batch_loss_, batch_error_ = sess.run(
                    [train_op, batch_loss, batch_error],
                    feed_dict={batch_size: batch_size_})

            # Print training stats.
            tf.logging.info('iteration: %d', i)
            tf.logging.info(
                'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ',
                batch_size_, batch_loss_, batch_error_)

            if FLAGS.optimizer == 'kfac':
                tf.logging.info('learning_rate = %f | momentum = %f',
                                learning_rate_, momentum_)
                tf.logging.info('damping = %f | rho = %f | qmodel_change = %f',
                                damping_, rho_, qmodel_change_)

            tf.logging.info('----')

Example #6

Show file

File: pyramid_lpt_local.py Project: ml-lab/flowpm

def main(_):

      
    mesh_shape = [("row", 2), ("col", 2)]
    layout_rules = [("nx_lr", "row"), ("ny_lr", "col"),
                    ("nx", "row"), ("ny", "col"),
                    ("ty_lr", "row"), ("tz_lr", "col"),
                    ("nx_block","row"), ("ny_block","col")]

                    
    mesh_hosts = ["localhost:%d"%(8222+j) for j in range(4)]
    
    # Create a cluster from the mesh hosts.                                                                                                                                         
    cluster = tf.train.ClusterSpec({"mesh": mesh_hosts, "master":["localhost:8488"]})

    # Create a server for local mesh members                                                                                                                                        
    server = tf.train.Server(cluster,
                       job_name="master",
                       task_index=0)

    mesh_devices = ['/job:mesh/task:%d'%i for i in range(cluster.num_tasks("mesh"))]
    print("List of devices", mesh_devices)
    mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(mesh_shape, layout_rules, mesh_devices)


    # Build the model
    
    # Create computational graphs and some initializations

    graph = mtf.Graph()
    mesh = mtf.Mesh(graph, "nbody_mesh")

    # Compute a few things first, using simple tensorflow
    a0=FLAGS.a0
    a=FLAGS.af
    nsteps=FLAGS.nsteps
    bs, nc = FLAGS.box_size, FLAGS.nc
    klin = np.loadtxt('../flowpm/data/Planck15_a1p00.txt').T[0]
    plin = np.loadtxt('../flowpm/data/Planck15_a1p00.txt').T[1]
    ipklin = iuspline(klin, plin)
    stages = np.linspace(a0, a, nsteps, endpoint=True)

    #pt = PerturbationGrowth(cosmology, a=[a], a_normalize=1.0)
    # Generate a batch of 3D initial conditions
    initial_conditions = flowpm.linear_field(FLAGS.nc,          # size of the cube
                                             FLAGS.box_size,         # Physical size of the cube
                                             ipklin,      # Initial power spectrum
                                             batch_size=FLAGS.batch_size)

    state = lpt_init(initial_conditions, a0=a0, order=1) 
    final_state = state#nbody(state,  stages, nc)
    tfinal_field = cic_paint(tf.zeros_like(initial_conditions), final_state[0])

    # Compute necessary Fourier kernels
    kvec = flowpm.kernels.fftk((nc, nc, nc), symmetric=False)
    from flowpm.kernels import laplace_kernel, gradient_kernel
    lap = tf.cast(laplace_kernel(kvec), tf.complex64)
    grad_x = gradient_kernel(kvec, 0)
    grad_y = gradient_kernel(kvec, 1)
    grad_z = gradient_kernel(kvec, 2)
    derivs = [lap, grad_x, grad_y, grad_z]
    
    mesh_final_field = lpt_prototype(mesh, initial_conditions, derivs,
                                     bs = FLAGS.box_size,
                                     nc=FLAGS.nc,batch_size=FLAGS.batch_size)
    # Lower mesh computation
    lowering = mtf.Lowering(graph, {mesh:mesh_impl})

    # Retrieve output of computation
    result = lowering.export_to_tf_tensor(mesh_final_field)

    with tf.Session(server.target, config=tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)) as sess:
        a,b,c = sess.run([initial_conditions, tfinal_field, result])
    np.save('init', a)
    np.save('reference_final', b)
    np.save('mesh_pyramid', c)
    
    
    plt.figure(figsize=(15,3))
    plt.subplot(141)
    plt.imshow(a[0].sum(axis=2))
    plt.title('Initial Conditions')

    plt.subplot(142)
    plt.imshow(b[0].sum(axis=2))
    plt.title('TensorFlow (single GPU)')
    plt.colorbar()

    plt.subplot(143)
    plt.imshow(c[0].sum(axis=2))
    plt.title('Mesh TensorFlow')
    plt.colorbar()

    plt.subplot(144)
    plt.imshow((b[0] - c[0]).sum(axis=2))
    plt.title('Residuals')
    plt.colorbar()

    plt.savefig("comparison.png")

    exit(0)

Example #7

Show file

def train(flags):
    """Model training."""

    flags.training = True

    # Set the verbosity based on flags (default is INFO, so we see all messages)
    logging.set_verbosity(flags.verbosity)

    # Start a new TensorFlow session.
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    tf.keras.backend.set_session(sess)

    audio_processor = input_data.AudioProcessor(flags)

    time_shift_samples = int((flags.time_shift_ms * flags.sample_rate) / 1000)

    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_steps_list = list(
        map(int, flags.how_many_training_steps.split(',')))
    learning_rates_list = list(map(float, flags.learning_rate.split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))
    logging.info(flags)
    model = models.MODELS[flags.model_name](flags)
    logging.info(model.summary())

    # save model summary
    utils.save_model_summary(model, flags.train_dir)

    # save model and data flags
    with open(os.path.join(flags.train_dir, 'flags.txt'), 'wt') as f:
        pprint.pprint(flags, stream=f)

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam(epsilon=flags.optimizer_epsilon)

    if flags.optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(epsilon=flags.optimizer_epsilon)
    elif flags.optimizer == 'momentum':
        optimizer = tf.keras.optimizers.SGD(momentum=0.9)
    elif flags.optimizer == 'novograd':
        optimizer = tfa.optimizers.NovoGrad(
            lr=0.05,
            beta_1=flags.novograd_beta_1,
            beta_2=flags.novograd_beta_2,
            weight_decay=flags.novograd_weight_decay,
            grad_averaging=bool(flags.novograd_grad_averaging))
    else:
        raise ValueError('Unsupported optimizer:%s' % flags.optimizer)

    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    train_writer = tf.summary.FileWriter(flags.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(flags.summaries_dir +
                                              '/validation')

    sess.run(tf.global_variables_initializer())

    start_step = 1

    logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.train.write_graph(sess.graph_def, flags.train_dir, 'graph.pbtxt')

    # Save list of words.
    with tf.io.gfile.GFile(os.path.join(flags.train_dir, 'labels.txt'),
                           'w') as f:
        f.write('\n'.join(audio_processor.words_list))

    best_accuracy = 0.0

    # prepare parameters for exp learning rate decay
    training_steps_max = np.sum(training_steps_list)
    lr_init = learning_rates_list[0]
    exp_rate = -np.log(learning_rates_list[-1] / lr_init) / training_steps_max

    # Training loop.
    for training_step in range(start_step, training_steps_max + 1):
        # Pull the audio samples we'll use for training.
        train_fingerprints, train_ground_truth = audio_processor.get_data(
            flags.batch_size, 0, flags, flags.background_frequency,
            flags.background_volume, time_shift_samples, 'training',
            flags.resample, flags.volume_resample, sess)

        if flags.lr_schedule == 'exp':
            learning_rate_value = lr_init * np.exp(-exp_rate * training_step)
        elif flags.lr_schedule == 'linear':
            # Figure out what the current learning rate is.
            training_steps_sum = 0
            for i in range(len(training_steps_list)):
                training_steps_sum += training_steps_list[i]
                if training_step <= training_steps_sum:
                    learning_rate_value = learning_rates_list[i]
                    break
        else:
            raise ValueError('Wrong lr_schedule: %s' % flags.lr_schedule)

        tf.keras.backend.set_value(model.optimizer.lr, learning_rate_value)
        result = model.train_on_batch(train_fingerprints, train_ground_truth)

        summary = tf.Summary(value=[
            tf.Summary.Value(tag='accuracy', simple_value=result[1]),
        ])
        train_writer.add_summary(summary, training_step)

        logging.info(
            'Step #%d: rate %f, accuracy %.2f%%, cross entropy %f',
            *(training_step, learning_rate_value, result[1] * 100, result[0]))

        is_last_step = (training_step == training_steps_max)
        if (training_step % flags.eval_step_interval) == 0 or is_last_step:
            set_size = audio_processor.set_size('validation')
            set_size = int(set_size / flags.batch_size) * flags.batch_size
            total_accuracy = 0.0
            count = 0.0
            for i in range(0, set_size, flags.batch_size):
                validation_fingerprints, validation_ground_truth = (
                    audio_processor.get_data(flags.batch_size, i, flags, 0.0,
                                             0.0, 0, 'validation', 0.0, 0.0,
                                             sess))

                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                result = model.test_on_batch(validation_fingerprints,
                                             validation_ground_truth)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag='accuracy', simple_value=result[1]),
                ])

                validation_writer.add_summary(summary, training_step)

                total_accuracy += result[1]
                count = count + 1.0

            total_accuracy = total_accuracy / count
            logging.info('Step %d: Validation accuracy = %.2f%% (N=%d)',
                         *(training_step, total_accuracy * 100, set_size))

            model.save_weights(flags.train_dir + 'train/' +
                               str(int(best_accuracy * 10000)) + 'weights_' +
                               str(training_step))

            # Save the model checkpoint when validation accuracy improves
            if total_accuracy >= best_accuracy:
                best_accuracy = total_accuracy
                # overwrite the best model weights
                model.save_weights(flags.train_dir + 'best_weights')
            logging.info('So far the best validation accuracy is %.2f%%',
                         (best_accuracy * 100))

    tf.keras.backend.set_learning_phase(0)
    set_size = audio_processor.set_size('testing')
    set_size = int(set_size / flags.batch_size) * flags.batch_size
    logging.info('set_size=%d', set_size)
    total_accuracy = 0.0
    count = 0.0

    for i in range(0, set_size, flags.batch_size):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            flags.batch_size, i, flags, 0.0, 0.0, 0, 'testing', 0.0, 0.0, sess)

        result = model.test_on_batch(test_fingerprints, test_ground_truth)

        total_accuracy += result[1]
        count = count + 1.0
    total_accuracy = total_accuracy / count

    logging.info('Final test accuracy = %.2f%% (N=%d)',
                 *(total_accuracy * 100, set_size))
    with open(os.path.join(flags.train_dir, 'accuracy_last.txt'), 'wt') as fd:
        fd.write(str(total_accuracy * 100))
    model.save_weights(flags.train_dir + 'last_weights')

Example #8

Show file

def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #print ("args: ", args)
    exps = pd.read_csv('exp.csv')
    for i, row in exps.iterrows():
        gc.collect()
        args['expname'] = row['name']
        args['sessionid'] = row['sessionid']
        args['itemid'] = row['itemid']
        args['data_folder'] = row['path']
        args['valid_data'] = row['test']
        args['train_data'] = row['train']
        args['freq'] = row['freq']

        print('Train:', args['train_data'], ' -- Test:', args['valid_data'],
              ' -- Freq:', args['freq'])
        with open("LOGGER_" + args['expname'] + ".txt", "a") as myfile:
            myfile.write(row['train'] + ", " + row['test'] + "\n")

        # split patterns to train_patterns and test_patterns
        print('Start Data Preprocessing: Training Set')
        train, itemsIDs, freqs, old_new = load_sequence(
            args['data_folder'] + '/' + args['train_data'],
            args['itemid'],
            args['sessionid'],
            itemsIDs=[])
        args['n_items'] = len(itemsIDs) + 1
        print('Start Data Preprocessing: Testing Set')
        valid, _, _, _ = load_sequence(args['data_folder'] + '/' +
                                       args['valid_data'],
                                       args['itemid'],
                                       args['sessionid'],
                                       Train=False,
                                       itemsIDs=itemsIDs,
                                       freq=args['freq'],
                                       old_new=old_new)

        #train, valid, test = data_process.load_data()
        print("%d train examples." % len(train[0]))
        print("%d valid examples." % len(valid[0]))
        keep_probability = np.array(args['keep_probability'])
        no_dropout = np.array(args['no_dropout'])
        result_path = "./save/" + args['dataset']
        # Build model
        tf.reset_default_graph()
        with tf.Session(config=config) as sess:
            model = CSRM(
                sess=sess,
                n_items=args['n_items'],
                dim_proj=int(args['dim_proj']),
                hidden_units=int(args['hidden_units']),
                memory_size=args['memory_size'],
                memory_dim=args['memory_dim'],
                shift_range=args['shift_range'],
                lr=args['lr'],
                controller_layer_numbers=args['controller_layer_numbers'],
                batch_size=args['batch_size'],
                epoch=args['epoch'],
                keep_probability=keep_probability,
                no_dropout=no_dropout,
                display_frequency=args['display_frequency'],
                item_freqs=freqs,
                expname=args['expname'])
            hit, MRR, cov, pop, train_time, test_time = model.train(
                train, valid, valid, result_path)

        print("#########################################################")
        print("NEW_LOGGER_ " + args['expname'])
        print(
            str(hit[0]) + ',' + str(hit[1]) + ',' + str(hit[2]) + ',' +
            str(hit[3]) + ',' + str(hit[4]) + ',' + str(MRR[0]) + ',' +
            str(MRR[1]) + ',' + str(MRR[2]) + ',' + str(MRR[3]) + ',' +
            str(MRR[4]))
        print("\nCOV:" + str(cov[0]) + ',' + str(cov[1]) + ',' + str(cov[2]) +
              ',' + str(cov[3]) + ',' + str(cov[4]))
        print("\nPOP:" + str(pop[0]) + ',' + str(pop[1]) + ',' + str(pop[2]) +
              ',' + str(pop[3]) + ',' + str(pop[4]))
        print("\nTrainTime:" + str(train_time))
        print("\nTestTime:" + str(test_time))

        with open("NEW_LOGGER_" + args['expname'] + ".txt", "a") as myfile:
            myfile.write(
                str(hit[0]) + ',' + str(hit[1]) + ',' + str(hit[2]) + ',' +
                str(hit[3]) + ',' + str(hit[4]) + ',' + str(MRR[0]) + ',' +
                str(MRR[1]) + ',' + str(MRR[2]) + ',' + str(MRR[3]) + ',' +
                str(MRR[4]))
            myfile.write("\nCOV:" + str(cov[0]) + ',' + str(cov[1]) + ',' +
                         str(cov[2]) + ',' + str(cov[3]) + ',' + str(cov[4]))
            myfile.write("\nPOP:" + str(pop[0]) + ',' + str(pop[1]) + ',' +
                         str(pop[2]) + ',' + str(pop[3]) + ',' + str(pop[4]))
            myfile.write("\nTrainTime:" + str(train_time))
            myfile.write("\nTestTime:" + str(test_time))
            myfile.write("\n############################################\n")

Example #9

Show file

File: tpu_executor.py Project: zhangzan1997/tpu

    def __init__(self,
                 model_fn,
                 params,
                 tpu_cluster_resolver=None,
                 keep_checkpoint_max=5):
        self._model_dir = params.model_dir
        self._params = params
        self._tpu_job_name = params.tpu_job_name
        self._evaluator = None
        self._tpu_cluster_resolver = tpu_cluster_resolver
        self._keep_checkpoint_max = keep_checkpoint_max

        input_partition_dims = None
        num_cores_per_replica = None

        if params.use_tpu or self._tpu_cluster_resolver:
            if not self._tpu_cluster_resolver:
                self._tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
                    params.platform.tpu,
                    zone=params.platform.tpu_zone,
                    project=params.platform.gcp_project)
            tpu_grpc_url = self._tpu_cluster_resolver.get_master()
            tf.Session.reset(tpu_grpc_url)

            # If the input image is transposed (from NHWC to HWCN), the partition
            # dimensions also need to be transposed the same way.
            def _maybe_transpose(input_partition_dims):
                if input_partition_dims and params.train.transpose_input:
                    return [input_partition_dims[i] for i in [1, 2, 3, 0]]
                else:
                    return input_partition_dims

            if params.train.input_partition_dims is not None:
                num_cores_per_replica = params.train.num_cores_per_replica
                input_partition_dims = params.train.input_partition_dims
                # Parse 'None' into None.
                input_partition_dims = [
                    None if x == 'None' else _maybe_transpose(x)
                    for x in input_partition_dims
                ]

            # Sets up config for TPUEstimator.
            tpu_config = tf.estimator.tpu.TPUConfig(
                params.train.iterations_per_loop,
                num_cores_per_replica=num_cores_per_replica,
                input_partition_dims=input_partition_dims,
                tpu_job_name=self._tpu_job_name,
                per_host_input_for_training=tf.estimator.tpu.
                InputPipelineConfig.PER_HOST_V2  # pylint: disable=line-too-long
            )

            run_config = tf.estimator.tpu.RunConfig(
                session_config=tf.ConfigProto(
                    isolate_session_state=params.isolate_session_state),
                cluster=self._tpu_cluster_resolver,
                evaluation_master=params.platform.eval_master,
                model_dir=params.model_dir,
                log_step_count_steps=params.train.iterations_per_loop,
                tpu_config=tpu_config,
                keep_checkpoint_max=self._keep_checkpoint_max,
            )
            self._estimator = tf.estimator.tpu.TPUEstimator(
                model_fn=model_fn,
                use_tpu=params.use_tpu,
                train_batch_size=params.train.train_batch_size,
                eval_batch_size=params.eval.eval_batch_size,
                predict_batch_size=params.predict.predict_batch_size,
                config=run_config,
                params=params.as_dict())
        else:
            model_params = params.as_dict()

            # Uses `train_batch_size` as the `batch_size` for GPU train.
            model_params.update({'batch_size': params.train.train_batch_size})

            gpu_devices = tf.config.experimental.list_physical_devices('GPU')
            tf.logging.info('gpu devices: %s', gpu_devices)
            devices = [
                'device:GPU:{}'.format(i) for i in range(len(gpu_devices))
            ]
            strategy = tf.distribute.MirroredStrategy(devices=devices)
            tf.logging.info('Number of devices: %s',
                            strategy.num_replicas_in_sync)
            run_config = tf.estimator.RunConfig(train_distribute=strategy,
                                                model_dir=params.model_dir)
            self._estimator = tf.estimator.Estimator(model_fn=model_fn,
                                                     config=run_config,
                                                     params=model_params)

Example #10

Show file

File: train.py Project: gesris/mlnll-analysis

def main(args):
    # Build nominal dataset
    classes = cfg.ml_classes + [
        n + '_ss' for n in cfg.ml_classes if n not in ['ggh', 'qqh']
    ] + ['data_ss']
    x, y, w = build_dataset(os.path.join(args.workdir,
                                         'fold{}.root'.format(args.fold)),
                            classes,
                            args.fold,
                            use_class_weights=False,
                            make_categorical=False)
    x_train, x_val, y_train, y_val, w_train, w_val = train_test_split(
        x, y, w, test_size=0.25, random_state=1234)
    logger.info(
        'Number of train/val events in nominal dataset: {} / {}'.format(
            x_train.shape[0], x_val.shape[0]))

    # Scale to expectation in the full dataset
    scale_train = 4.0 / 3.0 * 2.0  # train/test split + two fold
    scale_val = 4.0 * 2.0
    w_train = w_train * scale_train
    w_val = w_val * scale_val
    for i, name in enumerate(classes):
        s_train = np.sum(w_train[y_train == i])
        s_val = np.sum(w_val[y_val == i])
        logger.debug('Class / train / val: {} / {} / {}'.format(
            name, s_train, s_val))

    # Build dataset for systematic shifts
    """
    x_sys, y_sys, w_sys = build_dataset(os.path.join(args.workdir, 'fold{}.root'.format(args.fold)),
            ['htt', 'htt_jecUncRelativeSampleYearUp', 'htt_jecUncRelativeSampleYearDown'], args.fold,
            make_categorical=False, use_class_weights=True)
    x_sys_train, x_sys_val, w_sys_train, w_sys_val = train_test_split(x_sys, w_sys, test_size=0.25, random_state=1234)
    logger.info('Number of train/val events in varied datasets: {} / {}'.format(x_sys_train.shape[0], x_sys_val.shape[0]))
    logger.debug('Sum of weights for nominal/up/down: {} / {} / {}'.format(
        np.sum(w_sys[y_sys == 0]), np.sum(w_sys[y_sys == 1]), np.sum(w_sys[y_sys == 2])))
    """

    # Preprocessing
    preproc = StandardScaler()
    preproc.fit(x_train)
    pickle.dump(
        preproc,
        open(
            os.path.join(args.workdir,
                         'preproc_fold{}.pickle'.format(args.fold)), 'wb'))
    x_train_preproc = preproc.transform(x_train)
    x_val_preproc = preproc.transform(x_val)
    for i, (var, mean, std) in enumerate(
            zip(cfg.ml_variables, preproc.mean_, preproc.scale_)):
        logger.info('Variable: %s', var)
        logger.info('Preprocessing parameter (mean, std): %s, %s', mean, std)
        logger.info('Preprocessed data (mean, std): %s, %s',
                    np.mean(x_train_preproc[:, i]),
                    np.std(x_train_preproc[:, i]))

    # Create model
    x_ph = tf.placeholder(tf.float64, shape=(None, len(cfg.ml_variables)))
    logits, f, w_vars = model(x_ph, len(cfg.ml_variables), 1, args.fold)

    # Build NLL loss
    y_ph = tf.placeholder(tf.float64, shape=(None, ))
    w_ph = tf.placeholder(tf.float64, shape=(None, ))

    nll = 0.0
    bins = np.array(cfg.analysis_binning)
    mu = tf.constant(1.0, tf.float64)
    nuisances = {}
    epsilon = tf.constant(1e-9, tf.float64)
    for i, (up, down) in enumerate(zip(bins[1:], bins[:-1])):
        logger.debug('Add NLL for bin {} with boundaries [{}, {}]'.format(
            i, down, up))
        up = tf.constant(up, tf.float64)
        down = tf.constant(down, tf.float64)

        # Processes
        mask = count_masking(f, up, down)
        procs = {}
        for j, name in enumerate(classes):
            proc_w = mask * tf.cast(tf.equal(y_ph, tf.constant(j, tf.float64)),
                                    tf.float64) * w_ph
            procs[name] = tf.reduce_sum(proc_w)

        # QCD estimation
        procs['qcd'] = procs['data_ss']
        for p in [n for n in cfg.ml_classes if not n in ['ggh', 'qqh']]:
            procs['qcd'] -= procs[p + '_ss']
        procs['qcd'] = tf.maximum(procs['qcd'], 0)

        # Nominal signal and background
        sig = 0
        for p in ['ggh', 'qqh']:
            sig += procs[p]

        bkg = 0
        for p in ['ztt', 'zl', 'w', 'tt', 'vv', 'qcd']:
            bkg += procs[p]

        # Normalization uncertainties
        sys = 0.0
        for n in nuisances:
            pass

        # Expectations
        obs = sig + bkg
        exp = mu * sig + bkg + sys

        # Likelihood
        nll -= tfp.distributions.Poisson(tf.maximum(exp, epsilon)).log_prob(
            tf.maximum(obs, epsilon))

    # Nuisance constraints
    for n in nuisances:
        nll -= tfp.distributions.Normal(
            loc=tf.constant(0.0, dtype=tf.float64),
            scale=tf.constant(1.0, dtype=tf.float64)).log_prob(nuisances[n])

    # Compute constraint of mu
    def get_constraint(nll, params):
        hessian = [
            tf.gradients(g, params)
            for g in tf.unstack(tf.gradients(nll, params))
        ]
        inverse = tf.matrix_inverse(hessian)
        covariance_poi = inverse[0][0]
        constraint = tf.sqrt(covariance_poi)
        return constraint

    loss_fullnll = get_constraint(nll,
                                  [mu] + [nuisances[n] for n in nuisances])
    loss_statsonly = get_constraint(nll, [mu])

    # Add minimization ops
    def get_minimize_op(loss):
        optimizer = tf.train.AdamOptimizer()
        return optimizer.minimize(loss, var_list=w_vars)

    minimize_fullnll = get_minimize_op(loss_fullnll)
    minimize_statsonly = get_minimize_op(loss_statsonly)

    # Train
    config = tf.ConfigProto(intra_op_parallelism_threads=12,
                            inter_op_parallelism_threads=12)
    session = tf.Session(config=config)
    session.run([tf.global_variables_initializer()])
    saver = tf.train.Saver(max_to_keep=1)

    patience = 10
    patience_count = patience
    min_loss = 1e9
    tolerance = 0.001
    step = 0
    validation_steps = 20
    warmup_steps = 100
    while True:
        if step < warmup_steps:
            loss = loss_statsonly
            minimize = minimize_statsonly
            is_warmup = True
        else:
            loss = loss_fullnll
            minimize = minimize_fullnll
            is_warmup = False

        loss_train, _ = session.run([loss, minimize],
                                    feed_dict={
                                        x_ph: x_train_preproc,
                                        y_ph: y_train,
                                        w_ph: w_train
                                    })

        if step % validation_steps == 0:
            logger.info('Step / patience: {} / {}'.format(
                step, patience_count))
            logger.info('Train loss: {:.5f}'.format(loss_train))
            loss_val = session.run(loss,
                                   feed_dict={
                                       x_ph: x_val_preproc,
                                       y_ph: y_val,
                                       w_ph: w_val
                                   })
            logger.info('Validation loss: {:.5f}'.format(loss_val))

            if is_warmup:
                logger.info('Warmup: {} / {}'.format(step, warmup_steps))
            else:
                if min_loss > loss_val and np.abs(
                        min_loss - loss_val) / min_loss > tolerance:
                    min_loss = loss_val
                    patience_count = patience
                    path = saver.save(session,
                                      os.path.join(
                                          args.workdir,
                                          'model_fold{}/model.ckpt'.format(
                                              args.fold)),
                                      global_step=step)
                    logger.info('Save model to {}'.format(path))
                else:
                    patience_count -= 1

                if patience_count == 0:
                    logger.info('Stop training')
                    break

        step += 1

Example #11

Show file

def main(unused_argv):
    flags.mark_flag_as_required('model_dir')
    flags.mark_flag_as_required('pipeline_config_path')

    if FLAGS.gpu_device is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_device)

    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = True
    config = tf.estimator.RunConfig(
        model_dir=FLAGS.model_dir,
        session_config=session_config,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs)

    train_and_eval_dict = model_lib.create_estimator_and_inputs(
        run_config=config,
        pipeline_config_path=FLAGS.pipeline_config_path,
        train_steps=FLAGS.num_train_steps,
        sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
        sample_1_of_n_eval_on_train_examples=(
            FLAGS.sample_1_of_n_eval_on_train_examples))
    estimator = train_and_eval_dict['estimator']
    train_input_fn = train_and_eval_dict['train_input_fn']
    eval_input_fns = train_and_eval_dict['eval_input_fns']
    eval_on_train_input_fn = train_and_eval_dict['eval_on_train_input_fn']
    predict_input_fn = train_and_eval_dict['predict_input_fn']
    train_steps = train_and_eval_dict['train_steps']

    if FLAGS.checkpoint_dir:
        if FLAGS.eval_training_data:
            name = 'training_data'
            input_fn = eval_on_train_input_fn
        else:
            name = 'validation_data'
            # The first eval input will be evaluated.
            input_fn = eval_input_fns[0]
        if FLAGS.run_once:
            estimator.evaluate(input_fn,
                               steps=None,
                               checkpoint_path=tf.train.latest_checkpoint(
                                   FLAGS.checkpoint_dir))
        else:
            model_lib.continuous_eval(estimator, FLAGS.checkpoint_dir,
                                      input_fn, train_steps, name,
                                      FLAGS.max_eval_retries)
    else:
        train_spec, eval_specs = model_lib.create_train_and_eval_specs(
            train_input_fn,
            eval_input_fns,
            eval_on_train_input_fn,
            predict_input_fn,
            train_steps,
            eval_on_train_data=False)

        # Multiple Eval Specs allowed.
        # TODO: Fix name of saving_listeners
        saving_listeners = [
            EvalCheckpointSaverListener(estimator, eval_specs[0].input_fn,
                                        'validation')
        ]
        if len(eval_specs) > 1:
            saving_listeners.append(
                EvalCheckpointSaverListener(estimator, eval_specs[1].input_fn,
                                            'training'))

        estimator.train(input_fn=train_spec.input_fn,
                        max_steps=train_spec.max_steps,
                        saving_listeners=saving_listeners)

Example #12

Show file

    def benchmark_model(self,
                        warmup_runs,
                        bm_runs,
                        num_threads,
                        trace_filename=None):
        """Benchmark model."""
        if self.tensorrt:
            print('Using tensorrt ', self.tensorrt)
            self.build_and_save_model()
            graphdef = self.freeze_model()

        if num_threads > 0:
            print('num_threads for benchmarking: {}'.format(num_threads))
            sess_config = tf.ConfigProto(
                intra_op_parallelism_threads=num_threads,
                inter_op_parallelism_threads=1)
        else:
            sess_config = tf.ConfigProto()

        # rewriter_config_pb2.RewriterConfig.OFF
        sess_config.graph_options.rewrite_options.dependency_optimization = 2
        if self.use_xla:
            sess_config.graph_options.optimizer_options.global_jit_level = (
                tf.OptimizerOptions.ON_2)

        with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
            inputs = tf.placeholder(tf.float32,
                                    name='input',
                                    shape=self.inputs_shape)
            output = self.build_model(inputs, is_training=False)

            img = np.random.uniform(size=self.inputs_shape)

            sess.run(tf.global_variables_initializer())
            if self.tensorrt:
                fetches = [inputs.name] + [i.name for i in output]
                goutput = self.convert_tr(graphdef, fetches)
                inputs, output = goutput[0], goutput[1:]

            if not self.use_xla:
                # Don't use tf.group because XLA removes the whole graph for tf.group.
                output = tf.group(*output)

            output_name = [output.name]
            input_name = inputs.name
            graphdef = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_name)

        with tf.Graph().as_default(), tf.Session(config=sess_config) as sess:
            tf.import_graph_def(graphdef, name='')

            for i in range(warmup_runs):
                start_time = time.time()
                sess.run(output_name, feed_dict={input_name: img})
                print('Warm up: {} {:.4f}s'.format(i,
                                                   time.time() - start_time))

            print('Start benchmark runs total={}'.format(bm_runs))
            start = time.perf_counter()
            for i in range(bm_runs):
                sess.run(output_name, feed_dict={input_name: img})
            end = time.perf_counter()
            inference_time = (end - start) / 10
            print('Per batch inference time: ', inference_time)
            print('FPS: ', self.batch_size / inference_time)

            if trace_filename:
                run_options = tf.RunOptions()
                run_options.trace_level = tf.RunOptions.FULL_TRACE
                run_metadata = tf.RunMetadata()
                sess.run(output_name,
                         feed_dict={input_name: img},
                         options=run_options,
                         run_metadata=run_metadata)
                logging.info('Dumping trace to %s', trace_filename)
                trace_dir = os.path.dirname(trace_filename)
                if not tf.io.gfile.exists(trace_dir):
                    tf.io.gfile.makedirs(trace_dir)
                with tf.io.gfile.GFile(trace_filename, 'w') as trace_file:
                    from tensorflow.python.client import timeline  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
                    trace = timeline.Timeline(
                        step_stats=run_metadata.step_stats)
                    trace_file.write(
                        trace.generate_chrome_trace_format(show_memory=True))

Example #13

Show file

File: experiment.py Project: sarthakksu/covid-low-income-bam

def eval_once(ckpnt):
    """Evaluate on one checkpoint once."""
    ptches = np.zeros((14, 14, 32, 32))
    for i in range(14):
        for j in range(14):
            ind_x = i * 2
            ind_y = j * 2
            for k in range(5):
                for h in range(5):
                    ptches[i, j, ind_x + k, ind_y + h] = 1
    ptches = np.reshape(ptches, (14 * 14, 32, 32))

    with tf.Graph().as_default():
        features = get_features(False, 1)[0]
        if FLAGS.patching:
            features['images'] = features['cc_images']
            features['recons_label'] = features['cc_recons_label']
            features['labels'] = features['cc_labels']
        model = f_model.multi_gpu_model
        result = model([features])
        # merged = result['summary']
        correct_prediction_sum = result['correct']
        # almost_correct_sum = result['almost']
        # mid_act = result['mid_act']
        logits = result['logits']

        saver = tf.train.Saver()
        test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test_once')
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.per_process_gpu_memory_fraction = 0.3
        sess = tf.Session(config=config)
        # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpnt))
        saver.restore(sess, ckpnt)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        i = 0
        try:
            total_tp = 0
            for i in range(FLAGS.eval_size):
                #, g_ac, ac
                lb, tp, lg = sess.run([
                    features['recons_label'],
                    correct_prediction_sum,
                    logits,
                ])
                if FLAGS.patching:
                    batched_lg = np.sum(lg / np.sum(lg, axis=1, keepdims=True),
                                        axis=0)
                    batch_pred = np.argmax(batched_lg)
                    tp = np.equal(batch_pred, lb[0])

                total_tp += tp
            total_false = FLAGS.eval_size - total_tp
            print('false:{}, true:{}'.format(total_false, total_tp))
            # summary_tp = tf.Summary.FromString(summary_j)
            # summary_tp.value.add(tag='correct_prediction', simple_value=total_tp)
            # summary_tp.value.add(tag='wrong_prediction', simple_value=total_false)
            # summary_tp.value.add(
            #     tag='almost_wrong_prediction', simple_value=total_almost_false)
            # test_writer.add_summary(summary_tp, i + 1)
        except tf.errors.OutOfRangeError:
            print('Done eval for %d steps.' % i)
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()
        test_writer.close()

Example #14

Show file

File: experiment.py Project: sarthakksu/covid-low-income-bam

def eval_ensemble(ckpnts):
    """Evaluate on an ensemble of checkpoints."""
    with tf.Graph().as_default():
        first_features = get_features(False, 100)[0]
        h = first_features['height']
        d = first_features['depth']
        features = {
            'images': tf.placeholder(tf.float32, shape=(100, d, h, h)),
            'labels': tf.placeholder(tf.float32, shape=(100, 10)),
            'recons_image': tf.placeholder(tf.float32, shape=(100, d, h, h)),
            'recons_label': tf.placeholder(tf.int32, shape=(100)),
            'height': first_features['height'],
            'depth': first_features['depth']
        }

        model = f_model.multi_gpu_model
        result = model([features])
        logits = result['logits']
        config = tf.ConfigProto(allow_soft_placement=True)
        # saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpnt))
        batch_logits = np.zeros((FLAGS.eval_size // 100, 100, 10),
                                dtype=np.float32)
        batch_recons_label = np.zeros((FLAGS.eval_size // 100, 100),
                                      dtype=np.float32)
        batch_labels = np.zeros((FLAGS.eval_size // 100, 100, 10),
                                dtype=np.float32)
        batch_images = np.zeros((FLAGS.eval_size // 100, 100, d, h, h),
                                dtype=np.float32)
        batch_recons_image = np.zeros((FLAGS.eval_size // 100, 100, d, h, h),
                                      dtype=np.float32)
        saver = tf.train.Saver()
        sess = tf.Session(config=config)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            for i in range(FLAGS.eval_size // 100):
                (batch_recons_label[i, Ellipsis], batch_labels[i, Ellipsis],
                 batch_images[i, Ellipsis],
                 batch_recons_image[i, Ellipsis]) = sess.run([
                     first_features['recons_label'], first_features['labels'],
                     first_features['images'], first_features['recons_image']
                 ])
            for ckpnt in ckpnts:
                saver.restore(sess, ckpnt)
                for i in range(FLAGS.eval_size // 100):
                    logits_i = sess.run(logits,
                                        feed_dict={
                                            features['recons_label']:
                                            batch_recons_label[i, Ellipsis],
                                            features['labels']:
                                            batch_labels[i, Ellipsis],
                                            features['images']:
                                            batch_images[i, Ellipsis],
                                            features['recons_image']:
                                            batch_recons_image[i, Ellipsis]
                                        })
                    # batch_logits[i, ...] += softmax(logits_i)
                    batch_logits[i, Ellipsis] += logits_i
        except tf.errors.OutOfRangeError:
            print('Done eval for %d steps.' % i)
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()
            # Wait for threads to finish.
        coord.join(threads)
        sess.close()
        batch_pred = np.argmax(batch_logits, axis=2)
        total_wrong = np.sum(np.not_equal(batch_pred, batch_recons_label))
        print(total_wrong)

Example #15

Show file

def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if FLAGS.data_type == "onehop":
        dataset_class = input_fns.OneHopDataset
        eval_fn = evaluate.multihop_eval_fn
    elif FLAGS.data_type == "twohop":
        dataset_class = input_fns.TwoHopDataset
        eval_fn = evaluate.multihop_eval_fn
    elif FLAGS.data_type == "threehop":
        dataset_class = input_fns.ThreeHopDataset
        eval_fn = evaluate.multihop_eval_fn
    elif (FLAGS.data_type == "wikimovie" or FLAGS.data_type == "wikimovie-2hop"
          or FLAGS.data_type == "wikimovie-3hop"):
        dataset_class = input_fns.WikiMovieDataset
        eval_fn = evaluate.wikimovie_eval_fn
    elif FLAGS.data_type == "hotpotqa":
        dataset_class = input_fns.HotpotQADataset
        eval_fn = evaluate.hotpot_eval_fn
    if FLAGS.model_type == "onehop":
        create_model_fn = model_fns.create_onehop_model
    elif FLAGS.model_type == "twohop":
        create_model_fn = model_fns.create_twohop_model
    elif FLAGS.model_type == "twohop-cascaded":
        create_model_fn = model_fns.create_twohopcascade_model
    elif FLAGS.model_type == "threehop":
        create_model_fn = functools.partial(model_fns.create_twohop_model,
                                            num_hops=3)
    elif FLAGS.model_type == "threehop-cascaded":
        create_model_fn = functools.partial(
            model_fns.create_twohopcascade_model, num_hops=3)
    elif FLAGS.model_type == "wikimovie":
        create_model_fn = model_fns.create_wikimovie_model
    elif FLAGS.model_type == "wikimovie-2hop":
        create_model_fn = functools.partial(model_fns.create_wikimovie_model,
                                            num_hops=2)
    elif FLAGS.model_type == "wikimovie-3hop":
        create_model_fn = functools.partial(model_fns.create_wikimovie_model,
                                            num_hops=3)
    elif FLAGS.model_type == "hotpotqa":
        create_model_fn = functools.partial(model_fns.create_hotpotqa_model,
                                            num_hops=FLAGS.num_hops)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    # Load mention and entity files.
    mention2text = json.load(
        tf.gfile.Open(os.path.join(FLAGS.train_data_dir, "mention2text.json")))
    tf.logging.info("Loading metadata about entities and mentions...")
    entity2id, entity2name = json.load(
        tf.gfile.Open(os.path.join(FLAGS.train_data_dir, "entities.json")))
    entityid2name = {str(i): entity2name[e] for e, i in entity2id.items()}
    # all_paragraphs = json.load(tf.gfile.Open(os.path.join(
    #     FLAGS.train_data_dir, "subparas.json")))
    # all_mentions = np.load(tf.gfile.Open(os.path.join(
    #     FLAGS.train_data_dir, "mentions.npy")))
    all_paragraphs = None
    all_mentions = None

    qa_config = QAConfig(
        qry_layers_to_use=FLAGS.qry_layers_to_use,
        qry_aggregation_fn=FLAGS.qry_aggregation_fn,
        dropout=FLAGS.question_dropout,
        qry_num_layers=FLAGS.question_num_layers,
        projection_dim=FLAGS.projection_dim,
        load_only_bert=FLAGS.load_only_bert,
        num_entities=len(entity2id),
        max_entity_len=FLAGS.max_entity_len,
        ensure_answer_sparse=FLAGS.ensure_answer_sparse,
        ensure_answer_dense=FLAGS.ensure_answer_dense,
        train_with_sparse=FLAGS.train_with_sparse,
        predict_with_sparse=FLAGS.predict_with_sparse,
        fix_sparse_to_one=FLAGS.fix_sparse_to_one,
        supervision=FLAGS.supervision,
        l2_normalize_db=FLAGS.l2_normalize_db,
        entity_score_aggregation_fn=FLAGS.entity_score_aggregation_fn,
        entity_score_threshold=FLAGS.entity_score_threshold,
        softmax_temperature=FLAGS.softmax_temperature,
        sparse_reduce_fn=FLAGS.sparse_reduce_fn,
        intermediate_loss=FLAGS.intermediate_loss,
        light=FLAGS.light,
        sparse_strategy=FLAGS.sparse_strategy,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    mips_config = MIPSConfig(ckpt_path=os.path.join(FLAGS.train_data_dir,
                                                    "mention_feats"),
                             ckpt_var_name="db_emb",
                             num_mentions=len(mention2text),
                             emb_size=FLAGS.projection_dim * 2,
                             num_neighbors=FLAGS.num_mips_neighbors)

    validate_flags_or_throw()

    tf.gfile.MakeDirs(FLAGS.output_dir)

    if FLAGS.do_train:
        json.dump(
            tf.app.flags.FLAGS.flag_values_dict(),
            tf.gfile.Open(os.path.join(FLAGS.output_dir, "flags.json"), "w"))

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=8,
        tpu_config=tf.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host),
        session_config=tf.ConfigProto(log_device_placement=False))

    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_dataset = dataset_class(
            in_file=FLAGS.train_file,
            tokenizer=tokenizer,
            subject_mention_probability=FLAGS.subject_mention_probability,
            max_qry_length=FLAGS.max_query_length,
            is_training=True,
            entity2id=entity2id,
            tfrecord_filename=os.path.join(FLAGS.output_dir,
                                           "train.tf_record"))
        num_train_steps = int(train_dataset.num_examples /
                              FLAGS.train_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    summary_obj = None
    model_fn = model_fn_builder(
        bert_config=bert_config,
        qa_config=qa_config,
        mips_config=mips_config,
        init_checkpoint=FLAGS.init_checkpoint,
        e2m_checkpoint=os.path.join(FLAGS.train_data_dir, "ent2ment.npz"),
        m2e_checkpoint=os.path.join(FLAGS.train_data_dir, "coref.npz"),
        entity_id_checkpoint=os.path.join(FLAGS.train_data_dir, "entity_ids"),
        entity_mask_checkpoint=os.path.join(FLAGS.train_data_dir,
                                            "entity_mask"),
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        create_model_fn=create_model_fn,
        summary_obj=summary_obj)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.estimator.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num orig examples = %d", train_dataset.num_examples)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train(train_dataset, estimator, num_train_steps)

    if FLAGS.do_predict:
        eval_dataset = dataset_class(in_file=FLAGS.predict_file,
                                     tokenizer=tokenizer,
                                     subject_mention_probability=0.0,
                                     max_qry_length=FLAGS.max_query_length,
                                     is_training=False,
                                     entity2id=entity2id,
                                     tfrecord_filename=os.path.join(
                                         FLAGS.output_dir, "eval.tf_record"))

        continuous_eval(eval_dataset,
                        estimator,
                        mention2text,
                        entityid2name,
                        qa_config.supervision,
                        eval_fn,
                        paragraphs=all_paragraphs,
                        mentions=all_mentions)

    if FLAGS.do_test:
        # Load mention and entity files.
        mention2text = json.load(
            tf.gfile.Open(
                os.path.join(FLAGS.test_data_dir, "mention2text.json")))
        entity2id, entity2name = json.load(
            tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "entities.json")))
        entityid2name = {str(i): entity2name[e] for e, i in entity2id.items()}
        all_paragraphs = json.load(
            tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "subparas.json")))
        all_mentions = np.load(
            tf.gfile.Open(os.path.join(FLAGS.test_data_dir, "mentions.npy")))

        qa_config.num_entities = len(entity2id)
        mips_config = MIPSConfig(ckpt_path=os.path.join(
            FLAGS.test_data_dir, "mention_feats"),
                                 ckpt_var_name="db_emb",
                                 num_mentions=len(mention2text),
                                 emb_size=FLAGS.projection_dim * 2,
                                 num_neighbors=FLAGS.num_mips_neighbors)

        model_fn = model_fn_builder(
            bert_config=bert_config,
            qa_config=qa_config,
            mips_config=mips_config,
            init_checkpoint=FLAGS.init_checkpoint,
            e2m_checkpoint=os.path.join(FLAGS.test_data_dir, "ent2ment.npz"),
            m2e_checkpoint=os.path.join(FLAGS.test_data_dir, "coref.npz"),
            entity_id_checkpoint=os.path.join(FLAGS.test_data_dir,
                                              "entity_ids"),
            entity_mask_checkpoint=os.path.join(FLAGS.test_data_dir,
                                                "entity_mask"),
            learning_rate=FLAGS.learning_rate,
            num_train_steps=num_train_steps,
            num_warmup_steps=num_warmup_steps,
            use_tpu=FLAGS.use_tpu,
            use_one_hot_embeddings=FLAGS.use_tpu,
            create_model_fn=create_model_fn,
            summary_obj=summary_obj)
        estimator = tf.estimator.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

        eval_dataset = dataset_class(in_file=FLAGS.test_file,
                                     tokenizer=tokenizer,
                                     subject_mention_probability=0.0,
                                     max_qry_length=FLAGS.max_query_length,
                                     is_training=False,
                                     entity2id=entity2id,
                                     tfrecord_filename=os.path.join(
                                         FLAGS.output_dir, "test.tf_record"))

        if tf.gfile.Exists(os.path.join(FLAGS.output_dir, "best_model.meta")):
            ckpt_path = os.path.join(FLAGS.output_dir, "best_model")
        else:
            ckpt_path = None
        output_prediction_file = os.path.join(FLAGS.output_dir,
                                              "test_predictions.json")
        metrics = single_eval(eval_dataset,
                              estimator,
                              ckpt_path,
                              mention2text,
                              entityid2name,
                              qa_config.supervision,
                              output_prediction_file,
                              eval_fn,
                              paragraphs=all_paragraphs,
                              mentions=all_mentions)
        with tf.gfile.Open(os.path.join(FLAGS.output_dir, "test_metrics.txt"),
                           "w") as fo:
            for metric, value in metrics.items():
                tf.logging.info("%s: %.4f", metric, value)
                fo.write("%s %.4f\n" % (metric, value))

Example #16

Show file

File: genericNeuralNet.py Project: hieptk/accent

    def __init__(self, **kwargs):
        np.random.seed(0)
        tf.set_random_seed(0)

        self.batch_size = kwargs.pop('batch_size')
        self.data_sets = kwargs.pop('data_sets')
        self.train_dir = kwargs.pop('train_dir', 'output')
        log_dir = kwargs.pop('log_dir', 'log')
        self.model_name = kwargs.pop('model_name')
        self.num_classes = kwargs.pop('num_classes')
        self.initial_learning_rate = kwargs.pop('initial_learning_rate')
        self.decay_epochs = kwargs.pop('decay_epochs')
        self.avextol = kwargs.pop('avextol')

        if 'keep_probs' in kwargs: self.keep_probs = kwargs.pop('keep_probs')
        else: self.keep_probs = None

        if 'mini_batch' in kwargs: self.mini_batch = kwargs.pop('mini_batch')
        else: self.mini_batch = True

        if 'damping' in kwargs: self.damping = kwargs.pop('damping')
        else: self.damping = 0.0

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)

        # Initialize session
        os.environ["CUDA_VISIBLE_DEVICES"] = "1"
        gpu_options = tf.GPUOptions(allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        # config = tf.ConfigProto()
        # self.sess = tf.Session(config=config)
        # K.set_session(self.sess)

        # Setup input
        self.input_placeholder, self.labels_placeholder = self.placeholder_inputs(
        )
        self.num_train_examples = self.data_sets.train.labels.shape[0]
        self.num_test_examples = self.data_sets.test.labels.shape[0]

        # Setup inference and training
        if self.keep_probs is not None:
            self.keep_probs_placeholder = tf.placeholder(tf.float32, shape=(2))
            self.logits = self.inference(self.input_placeholder,
                                         self.keep_probs_placeholder)
        elif hasattr(self, 'inference_needs_labels'):
            self.logits = self.inference(self.input_placeholder,
                                         self.labels_placeholder)
        else:
            self.logits = self.inference(self.input_placeholder)

        self.total_loss, self.loss_no_reg, self.indiv_loss_no_reg = self.loss(
            self.logits, self.labels_placeholder)

        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.learning_rate = tf.Variable(self.initial_learning_rate,
                                         name='learning_rate',
                                         trainable=False)
        self.learning_rate_placeholder = tf.placeholder(tf.float32)
        self.update_learning_rate_op = tf.assign(
            self.learning_rate, self.learning_rate_placeholder)

        # self.optimizer = tf.train.AdamOptimizer(self.learning_rate)
        # self.train_op = self.optimizer.minimize(self.total_loss, global_step=self.global_step)

        self.train_op, self.reset_optimizer_op = self.get_train_op(
            self.total_loss, self.global_step, self.learning_rate)
        self.train_sgd_op = self.get_train_sgd_op(self.total_loss,
                                                  self.global_step,
                                                  self.learning_rate * 10)
        # self.train_op=self.train_sgd_op
        self.accuracy_op = self.get_accuracy_op(self.logits,
                                                self.labels_placeholder)
        self.preds = self.predictions(self.logits)

        # Setup misc
        self.saver = tf.train.Saver()

        # Setup gradients and Hessians
        self.params = self.get_all_params()
        self.grad_total_loss_op = tf.gradients(self.total_loss, self.params)
        self.grad_loss_no_reg_op = tf.gradients(self.loss_no_reg, self.params)
        self.grad_loss_r = tf.gradients(tf.squeeze(self.logits), self.params)
        self.v_placeholder = [
            tf.placeholder(tf.float32, shape=a.get_shape())
            for a in self.params
        ]
        self.u_placeholder = [
            tf.placeholder(tf.float32, shape=a.get_shape())
            for a in self.params
        ]

        self.hessian_vector = hessian_vector_product(self.total_loss,
                                                     self.params,
                                                     self.v_placeholder)

        self.grad_loss_wrt_input_op = tf.gradients(self.total_loss,
                                                   self.input_placeholder)

        # Because tf.gradients auto accumulates, we probably don't need the add_n (or even reduce_sum)
        self.influence_op = tf.add_n([
            tf.reduce_sum(tf.multiply(a, array_ops.stop_gradient(b)))
            for a, b in zip(self.grad_total_loss_op, self.v_placeholder)
        ])

        self.grad_influence_wrt_input_op = tf.gradients(
            self.influence_op, self.input_placeholder)

        self.checkpoint_file = os.path.join(self.train_dir,
                                            "%s-checkpoint" % self.model_name)

        self.all_train_feed_dict = self.fill_feed_dict_with_all_ex(
            self.data_sets.train)
        self.all_test_feed_dict = self.fill_feed_dict_with_all_ex(
            self.data_sets.test)

        init = tf.global_variables_initializer()
        self.sess.run(init)

        self.vec_to_list = self.get_vec_to_list_fn()
        self.adversarial_loss, self.indiv_adversarial_loss = self.adversarial_loss(
            self.logits, self.labels_placeholder)
        if self.adversarial_loss is not None:
            self.grad_adversarial_loss_op = tf.gradients(
                self.adversarial_loss, self.params)

Example #17

Show file

File: tf1dp.py Project: anonymous0118999/mlsyssubmission

def main(args):
    print(args)
    tf.disable_eager_execution()
    if args.memory_limit:
        physical_devices = tf.config.list_physical_devices('GPU')
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        tf.config.experimental.set_virtual_device_configuration(
            physical_devices[0], [
                tf.config.experimental.VirtualDeviceConfiguration(
                    memory_limit=args.memory_limit)
            ])

    assert args.microbatches is None
    args.microbatches = args.batch_size

    data_fn = data.data_fn_dict[args.experiment][int(args.dummy_data)]
    kwargs = {
        'max_features': args.max_features,
        'max_len': args.max_len,
        'format': 'NHWC',
    }
    if args.dummy_data:
        kwargs['num_examples'] = args.batch_size * 2
    (train_data, train_labels), _ = data_fn(**kwargs)
    num_train_eg = train_data.shape[0]

    loss_fn = tf.nn.sparse_softmax_cross_entropy_with_logits
    if args.experiment == 'logreg':
        loss_fn = lambda labels, logits: tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels, logits=tf.squeeze(logits))
        train_labels = train_labels.astype('float32')

    model = partial(model_dict[args.experiment],
                    features=train_data,
                    max_features=args.max_features,
                    args=args)

    if args.use_xla:
        # Not sure which one of these two works, so I'll just use both
        assert os.environ['TF_XLA_FLAGS'] == '--tf_xla_auto_jit=2'
        session_config = tf.ConfigProto()
        session_config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2
        run_config = tf.estimator.RunConfig(session_config=session_config)
        print('Using XLA!')
    else:
        run_config = None
        print('NOT using XLA!')

    model_obj = tf.estimator.Estimator(model_fn=partial(
        nn_model_fn, model, loss_fn, args),
                                       config=run_config)
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': train_data},
        y=train_labels,
        batch_size=args.batch_size,
        num_epochs=args.epochs,
        shuffle=True)

    steps_per_epoch = num_train_eg // args.batch_size
    timings = []
    for epoch in range(1, args.epochs + 1):
        start = time.perf_counter()
        model_obj.train(input_fn=train_input_fn, steps=steps_per_epoch)
        duration = time.perf_counter() - start
        print("Time Taken: ", duration)
        timings.append(duration)

        if args.dpsgd:
            # eps = compute_epsilon(epoch, num_train_eg, args)
            # print('For delta=1e-5, the current epsilon is: %.2f' % eps)
            print('Trained with DPSGD optimizer')
        else:
            print('Trained with vanilla non-private SGD optimizer')

    if not args.no_save:
        utils.save_runtimes(__file__.split('.')[0], args, timings)
    else:
        print('Not saving!')
    print('Done!')

Example #18

Show file

File: main.py Project: GirardMatthew23/InverseHalftone

def evaluate(test_list, checkpoint_dir):
    print('Running PRLNet -Evaluation!')
    save_dir_test = os.path.join("./output/results")
    exists_or_mkdir(save_dir_test)
    # --------------------------------- set model ---------------------------------
    # data fetched within range: [-1,1]
    input_imgs, target_imgs, num = input_producer(test_list,
                                                  in_channels,
                                                  batch_size,
                                                  need_shuffle=False)
    contents, details, pred_imgs = gen_PRLNet(input_imgs,
                                              out_channels,
                                              is_train=False,
                                              reuse=False)

    # --------------------------------- evaluation ---------------------------------
    # set GPU resources
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.45

    saver = tf.train.Saver()
    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # Restore model weights from previously saved model
        check_pt = tf.train.get_checkpoint_state(checkpoint_dir)
        if check_pt and check_pt.model_checkpoint_path:
            saver.restore(sess, check_pt.model_checkpoint_path)
            print('model is loaded successfully.')
        else:
            print('# error: loading checkpoint failed.')
            return None

        cnt = 0
        psnr_list = []
        ssim_list = []
        start_time = time.time()
        while not coord.should_stop():
            tm = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
            print('%s evaluating: [%d - %d]' % (tm, cnt, cnt + batch_size))
            pd_images, gt_images = sess.run([pred_imgs, target_imgs])
            save_images_from_batch(pd_images, save_dir_test, cnt)
            psnr, ssim = measure_quality(pd_images, gt_images)
            psnr_list.append(psnr)
            ssim_list.append(ssim)
            cnt += batch_size
            if cnt >= num:
                coord.request_stop()

        # Wait for threads to finish.
        coord.join(threads)
        sess.close()
        print("Testing finished! consumes %f sec" % (time.time() - start_time))
        print("Numerical accuracy computing ...")
        # numerical evaluation
        mean_psnr = np.mean(np.array(psnr_list))
        stde_psnr = np.std(np.array(psnr_list))
        mean_ssim = np.mean(np.array(ssim_list))
        stde_ssim = np.std(np.array(ssim_list))
        save_path = os.path.join("./output/", "accuracy.txt")
        with open(save_path, 'w') as f:
            f.writelines('mean psnr:' + str(mean_psnr) + '\n')
            f.writelines('stde psnr:' + str(stde_psnr) + '\n\n')
            f.writelines('mean ssim:' + str(mean_ssim) + '\n')
            f.writelines('stde psnr:' + str(stde_ssim) + '\n')
        print("Done!")

Example #19

Show file

def train(replication_factor, batch_size, batch_per_step, profile, num_iter,
          time_steps):
    """Launch training."""

    # Set up in-feeds for the data
    with tf.device('cpu'):
        data_generator = EnvGenerator(batch_size, time_steps)
        items = next(data_generator)
        output_types = tuple((tf.dtypes.as_dtype(i.dtype) for i in items))
        output_shapes = tuple((tf.TensorShape(i.shape) for i in items))
        total_bytes = 0
        for i in items:
            total_bytes += i.nbytes
        print(f'Input data size = {total_bytes/1000000} MB/batch')
        dataset = tf.data.Dataset.from_generator(data_generator,
                                                 output_types=output_types,
                                                 output_shapes=output_shapes)
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
            dataset, "InfeedQueue", replication_factor=replication_factor)
        data_init = infeed_queue.initializer

    # Compile loss op
    with ipu_scope("/device:IPU:0"):
        total_loss = ipu_compiler.compile(
            lambda: loops.repeat(batch_per_step,
                                 build_train_op,
                                 infeed_queue=infeed_queue,
                                 inputs=[tf.constant(0.0, dtype=DTYPE)]))
    # Set up report op optionally.
    if profile:
        with tf.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

    # Set up session on IPU
    opts = utils.create_ipu_config(
        profiling=profile,
        use_poplar_text_report=use_poplar_text_report,
        profile_execution=profile,
        merge_infeed_io_copies=True)
    opts = utils.set_optimization_options(
        opts, max_cross_replica_sum_buffer_size=10000000)
    opts = utils.auto_select_ipus(opts, [replication_factor])
    utils.configure_ipu_system(opts)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=True))

    # Initialize variables
    utils.move_variable_initialization_to_cpu()
    sess.run([tf.global_variables_initializer(), data_init])

    # Run training and time
    total_time = 0.0
    total_samples = 0
    skip_iterations = 5  # Initially the infeed may buffer extra input data and
    # first run for IPU includes XLA compile, so skipping these iterations for calculating items/sec.
    for iters in range(num_iter):
        data_generator.reset_counter()
        t0 = time.perf_counter()
        sess.run(total_loss)
        t1 = time.perf_counter()

        if profile:
            raw_reports = sess.run(report)
            if use_poplar_text_report:
                # extract the report
                rep = utils.extract_all_strings_from_event_trace(raw_reports)
                print("Writing profiling report to %s" % report_dest)
                with open(report_dest, "w") as f:
                    f.write(rep)
            else:
                os.makedirs('profile_rl', exist_ok=True)
                save_tf_report(raw_reports, log_dir='profile_rl')
                print("Writing profiling report to profile_rl")
            break

        if iters > skip_iterations:
            total_time += (t1 - t0)
            total_samples += (batch_size * batch_per_step * replication_factor)
            print("Average %.1f items/sec" % (total_samples / total_time))

Example #20

Show file

File: main.py Project: GirardMatthew23/InverseHalftone

def train(train_list, val_list, debug_mode=True):
    print('Running PRLNet -Training!')
    # create folders to save trained model and results
    graph_dir = './graph'
    checkpt_dir = './model'
    ouput_dir = './output'
    exists_or_mkdir(graph_dir, need_remove=True)
    exists_or_mkdir(ouput_dir)
    exists_or_mkdir(checkpt_dir)

    # --------------------------------- load data ---------------------------------
    # data fetched at range: [-1,1]
    input_imgs, target_imgs, num = input_producer(train_list,
                                                  in_channels,
                                                  batch_size,
                                                  need_shuffle=True)
    if debug_mode:
        input_val, target_val, num_val = input_producer(val_list,
                                                        in_channels,
                                                        batch_size,
                                                        need_shuffle=False)

    pred_content, pred_detail, pred_imgs = gen_PRLNet(input_imgs,
                                                      out_channels,
                                                      is_train=True,
                                                      reuse=False)
    if debug_mode:
        _, _, pred_val = gen_PRLNet(input_val,
                                    out_channels,
                                    is_train=False,
                                    reuse=True)

    # --------------------------------- loss terms ---------------------------------
    with tf.name_scope('Loss') as loss_scp:
        target_224 = tf.image.resize_images(target_imgs,
                                            size=[224, 224],
                                            method=0,
                                            align_corners=False)
        predict_224 = tf.image.resize_images(pred_imgs,
                                             size=[224, 224],
                                             method=0,
                                             align_corners=False)
        vgg19_api = VGG19("../vgg19.npy")
        vgg_map_targets = vgg19_api.build((target_224 + 1) / 2,
                                          is_rgb=(in_channels == 3))
        vgg_map_predict = vgg19_api.build((predict_224 + 1) / 2,
                                          is_rgb=(in_channels == 3))

        content_loss = tf.losses.mean_squared_error(target_imgs, pred_content)
        vgg_loss = 2e-6 * tf.losses.mean_squared_error(vgg_map_targets,
                                                       vgg_map_predict)
        l1_loss = tf.reduce_mean(tf.abs(target_imgs - pred_imgs))
        mse_loss = tf.losses.mean_squared_error(target_imgs, pred_imgs)

        loss_op = content_loss + 2 * vgg_loss + l1_loss

    # --------------------------------- solver definition ---------------------------------
    global_step = tf.Variable(0, name='global_step', trainable=False)
    iters_per_epoch = np.floor_divide(num, batch_size)
    lr_decay = tf.train.polynomial_decay(
        learning_rate=learning_rate,
        global_step=global_step,
        decay_steps=iters_per_epoch * n_epochs,
        end_learning_rate=learning_rate / 100.0,
        power=0.9)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.name_scope('optimizer'):
        with tf.control_dependencies(update_ops):
            gen_vars = [
                var for var in tf.trainable_variables()
                if var.name.startswith("PRLNet")
            ]
            gen_optim = tf.train.AdamOptimizer(lr_decay, beta1)
            gen_grads_and_vars = gen_optim.compute_gradients(loss_op,
                                                             var_list=gen_vars)
            train_op = gen_optim.apply_gradients(gen_grads_and_vars,
                                                 global_step=global_step)

    # --------------------------------- model training ---------------------------------
    '''
    if debug_mode:
        with tf.name_scope('summarise') as sum_scope:
            tf.summary.scalar('loss', loss_op)
            tf.summary.scalar('learning rate', lr_decay)
            tf.summary.image('predicts', pred_imgs, max_outputs=9)
            summary_op = tf.summary.merge_all()
    '''

    with tf.name_scope("parameter_count"):
        num_parameters = tf.reduce_sum(
            [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])

    # set GPU resources
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = 0.45

    saver = tf.train.Saver(max_to_keep=1)
    loss_list = []
    psnr_list = []
    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        sess.run(tf.global_variables_initializer())
        print(">>------------>>> [Training_Num] =%d" % num)
        print(">>------------>>> [Parameter_Num] =%d" %
              sess.run(num_parameters))
        '''
        if debug_mode:
            with tf.name_scope(sum_scope):
                summary_writer = tf.summary.FileWriter(graph_dir, graph=sess.graph)
        '''
        for epoch in range(0, n_epochs):
            start_time = time.time()
            epoch_loss, n_iters = 0, 0
            for step in range(0, num, batch_size):
                _, loss = sess.run([train_op, loss_op])
                epoch_loss += loss
                n_iters += 1
                # iteration information
                if n_iters % display_steps == 0:
                    tm = datetime.datetime.now().strftime(
                        '%Y-%m-%d %H:%M:%S.%f')
                    print("%s >> [%d/%d] iter: %d  loss: %4.4f" %
                          (tm, epoch, n_epochs, n_iters, loss))
                    '''
                    if debug_mode:
                        summary_str = sess.run(summary_op)
                        summary_writer.add_summary(summary_str, step)
                    '''

            # epoch information
            epoch_loss = epoch_loss / n_iters
            loss_list.append(epoch_loss)
            print(
                "[*] ----- Epoch: %d/%d | Loss: %4.4f | Time-consumed: %4.3f -----"
                % (epoch, n_epochs, epoch_loss, (time.time() - start_time)))

            if (epoch + 1) % save_epochs == 0:
                if debug_mode:
                    print("----- validating model ...")
                    mean_psnr, nn = 0, 0
                    for idx in range(0, num_val, batch_size):
                        predicts, groundtruths = sess.run(
                            [pred_val, target_val])
                        save_images_from_batch(predicts, ouput_dir, idx)
                        psnr = measure_psnr(predicts, groundtruths)
                        mean_psnr += psnr
                        nn += 1
                    psnr_list.append(mean_psnr / nn)
                    print("----- psnr:%4.4f" % (mean_psnr / nn))

                print("----- saving model  ...")
                saver.save(sess,
                           os.path.join(checkpt_dir, "model.cpkt"),
                           global_step=global_step)
                save_list(os.path.join(ouput_dir, "loss"), loss_list)
                save_list(os.path.join(ouput_dir, "psnr"), psnr_list)

        # stop data queue
        coord.request_stop()
        coord.join(threads)
        # write out the loss list
        save_list(os.path.join(ouput_dir, "loss"), loss_list)
        save_list(os.path.join(ouput_dir, "psnr"), psnr_list)
        print("Training finished!")

    return None

Example #21

Show file

File: inference.py Project: ailabktw/automl

  def export(self,
             output_dir: Text,
             tflite_path: Text = None,
             tensorrt: Text = None):
    """Export a saved model, frozen graph, and potential tflite/tensorrt model.

    Args:
      output_dir: the output folder for saved model.
      tflite_path: the path for saved tflite file.
      tensorrt: If not None, must be {'FP32', 'FP16', 'INT8'}.
    """
    signitures = self.signitures
    signature_def_map = {
        'serving_default':
            tf.saved_model.predict_signature_def(
                {signitures['image_arrays'].name: signitures['image_arrays']},
                {signitures['prediction'].name: signitures['prediction']}),
    }
    b = tf.saved_model.Builder(output_dir)
    b.add_meta_graph_and_variables(
        self.sess,
        tags=['serve'],
        signature_def_map=signature_def_map,
        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
        clear_devices=True)
    b.save()
    logging.info('Model saved at %s', output_dir)

    # also save freeze pb file.
    graphdef = self.freeze()
    pb_path = os.path.join(output_dir, self.model_name + '_frozen.pb')
    tf.io.gfile.GFile(pb_path, 'wb').write(graphdef.SerializeToString())
    logging.info('Frozen graph saved at %s', pb_path)

    if tflite_path:
      height, width = utils.parse_image_size(self.params['image_size'])
      input_name = signitures['image_arrays'].op.name
      input_shapes = {input_name: [None, height, width, 3]}
      converter = tf.lite.TFLiteConverter.from_saved_model(
          output_dir,
          input_arrays=[input_name],
          input_shapes=input_shapes,
          output_arrays=[signitures['prediction'].op.name])
      converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
      tflite_model = converter.convert()

      tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_model)
      logging.info('TFLite is saved at %s', tflite_path)

    if tensorrt:
      from tensorflow.python.compiler.tensorrt import trt  # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top
      sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
      trt_path = os.path.join(output_dir, 'tensorrt_' + tensorrt.lower())
      trt.create_inference_graph(
          None,
          None,
          precision_mode=tensorrt,
          input_saved_model_dir=output_dir,
          output_saved_model_dir=trt_path,
          session_config=sess_config)
      logging.info('TensorRT model is saved at %s', trt_path)

Example #22

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('checkpoint_path', help='Path to checkpoint to load')
    parser.add_argument('--input-size',
                        type=int,
                        default=256,
                        help='Shape of input to use (depends on checkpoint)')
    parser.add_argument('--inter',
                        nargs='+',
                        type=int,
                        help='Interpolate between the 4 style given')
    arguments = parser.parse_args()

    style_control = []
    style_inter = arguments.inter
    if not style_inter:
        for style_index in range(16):
            style_control.append([0.0] * 16)
            style_control[-1][style_index] = 1
    else:
        for col in range(4):
            for row in range(4):
                style_index = (col % 4) + (row * 4)
                style_control.append([0.0] * 16)
                # top left style
                style_control[-1][style_inter[0]] = ((3 - row) / 3) * (
                    (3 - col) / 3)
                # top right style
                style_control[-1][style_inter[1]] = (row / 3) * ((3 - col) / 3)
                # bottom left style
                style_control[-1][style_inter[2]] = ((3 - row) / 3) * (col / 3)
                # bottom right style
                style_control[-1][style_inter[3]] = (row / 3) * (col / 3)
    style_control = np.asarray(style_control, dtype=np.float)

    capture = cv2.VideoCapture(-1)

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    import tensorflow as tf
    if tf.__version__.split('.')[0] == '2':
        import tensorflow.compat.v1 as tf
        tf.disable_v2_behavior()
    import tensorflow.compat.v1 as tf1
    from engine_multi import EngineMultiStyle

    gpu_options = tf1.GPUOptions(allow_growth=True)
    session_config = tf1.ConfigProto(gpu_options=gpu_options)
    with tf1.Session(config=session_config).as_default() as session:
        input_size = arguments.input_size
        engine = EngineMultiStyle(session, input_size,
                                  arguments.checkpoint_path)

        mosaic = np.zeros((4 * input_size, 4 * input_size, 3), dtype=np.uint8)
        while (True):
            # Capture frame-by-frame
            _, frame = capture.read()

            frame = cv2.resize(frame,
                               (arguments.input_size, arguments.input_size))
            input_image = np.asarray(frame)

            outputs = engine.predict([input_image] * 16, style_control)
            for row in range(4):
                for col in range(4):
                    mosaic[col * input_size:(col + 1) * input_size,
                           row * input_size:(row + 1) *
                           input_size] = outputs[(4 * col) + row]

            # Display the resulting frame
            cv2.imshow('original', input_image)
            cv2.imshow('style', mosaic)
            key_pressed = cv2.waitKey(1) & 0xFF
            if key_pressed == ord('q'):
                break

    # When everything done, release the capture
    capture.release()
    cv2.destroyAllWindows()

Example #23

Show file

def train(config):
    Model_cls = HandwritingVRNNGmmModel
    Dataset_cls = HandWritingDatasetConditionalTF

    # Dataset
    training_dataset = Dataset_cls(config['training_data'],
                                   use_bow_labels=config['use_bow_labels'])

    num_training_iterations = int(training_dataset.num_samples /
                                  config['batch_size'])
    print("# training steps per epoch: " + str(num_training_iterations))

    # Create a tensorflow sub-graph that loads batches of samples.
    if config.get('use_bucket_feeder', True) and training_dataset.is_dynamic:
        bucket_edges = training_dataset.get_seq_len_histogram(
            num_bins=15, collapse_first_and_last_bins=[2, -2])
        data_feeder = DataFeederTF(training_dataset,
                                   config['num_epochs'],
                                   config['batch_size'],
                                   queue_capacity=1024)

        sequence_length, inputs, targets = data_feeder.batch_queue_bucket(
            bucket_edges,
            dynamic_pad=training_dataset.is_dynamic,
            queue_capacity=300,
            queue_threads=4)
    else:
        # Training data
        data_feeder = DataFeederTF(training_dataset,
                                   config['num_epochs'],
                                   config['batch_size'],
                                   queue_capacity=1024)
        sequence_length, inputs, targets = data_feeder.batch_queue(
            dynamic_pad=training_dataset.is_dynamic,
            queue_capacity=512,
            queue_threads=4)

    if config.get('use_staging_area', False):
        staging_area = TFStagingArea([sequence_length, inputs, targets],
                                     device_name="/gpu:0")
        sequence_length, inputs, targets = staging_area.tensors

    # Create step counter (used by optimization routine and learning rate function.)
    global_step = tf.compat.v1.get_variable(name='global_step',
                                            trainable=False,
                                            initializer=1)

    # Annealing KL-divergence loss.
    kld_loss_weight_backup = config['loss_weights']['kld_loss']
    if type(config['loss_weights']['kld_loss']) == np.ndarray:
        # Create a piecewise increasing kld loss weight.
        num_steps = len(config['loss_weights']['kld_loss'])
        values = np.linspace(0, 1, num_steps + 1).tolist()
        boundaries = (config['loss_weights']['kld_loss'] *
                      num_training_iterations).tolist()

        config['loss_weights']['kld_loss'] = tf.train.piecewise_constant(
            global_step, boundaries=boundaries, values=values)
        tf.summary.scalar('training/kld_loss_weight',
                          config['loss_weights']['kld_loss'],
                          collections=["training_status"])

    # Create training graph.
    with tf.name_scope("training"):
        model = Model_cls(config,
                          reuse=False,
                          input_op=inputs,
                          target_op=targets,
                          input_seq_length_op=sequence_length,
                          input_dims=training_dataset.input_dims,
                          target_dims=training_dataset.target_dims,
                          mode="training",
                          data_processor=training_dataset)

        model.build_graph()
        model.create_image_summary(training_dataset.prepare_for_visualization)

    # Create sampling graph.
    with tf.name_scope("sampling"):
        sampling_input_op = tf.compat.v1.placeholder(
            tf.float32,
            shape=[
                1, training_dataset.sequence_length,
                sum(training_dataset.input_dims)
            ])
        sampling_sequence_length_op = tf.compat.v1.placeholder(tf.int32,
                                                               shape=[1])
        sampling_model = Model_cls(
            config,
            reuse=True,
            input_op=sampling_input_op,
            target_op=None,
            input_seq_length_op=sampling_sequence_length_op,
            input_dims=training_dataset.input_dims,
            target_dims=training_dataset.target_dims,
            batch_size=1,
            mode="sampling",
            data_processor=training_dataset)
        sampling_model.build_graph()
        sampling_model.create_image_summary(
            training_dataset.prepare_for_visualization)

    # Validation model.
    if config.get('validate_model', False):
        validation_dataset = Dataset_cls(
            config['validation_data'], use_bow_labels=config['use_bow_labels'])

        num_validation_iterations = int(validation_dataset.num_samples /
                                        config['batch_size'])
        print("# validation steps per epoch: " +
              str(num_validation_iterations))

        valid_data_feeder = DataFeederTF(validation_dataset,
                                         config['num_epochs'],
                                         config['batch_size'],
                                         queue_capacity=1024,
                                         shuffle=False)
        valid_sequence_length, valid_inputs, valid_targets = valid_data_feeder.batch_queue(
            dynamic_pad=validation_dataset.is_dynamic,
            queue_capacity=512,
            queue_threads=4)

        if 'use_staging_area' in config and config['use_staging_area']:
            valid_staging_area = TFStagingArea(
                [valid_sequence_length, valid_inputs, valid_targets],
                device_name="/gpu:0")
            valid_sequence_length, valid_inputs, valid_targets = valid_staging_area.tensors

        with tf.name_scope("validation"):
            valid_model = Model_cls(config,
                                    reuse=True,
                                    input_op=valid_inputs,
                                    target_op=valid_targets,
                                    input_seq_length_op=valid_sequence_length,
                                    input_dims=validation_dataset.input_dims,
                                    target_dims=validation_dataset.target_dims,
                                    mode="training",
                                    data_processor=validation_dataset)
            valid_model.build_graph()

    # Create a session object and initialize parameters.
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            allow_soft_placement=True))

    if config['learning_rate_type'] == 'exponential':
        learning_rate = tf.train.exponential_decay(
            config['learning_rate'],
            global_step=global_step,
            decay_steps=config['learning_rate_decay_steps'],
            decay_rate=config['learning_rate_decay_rate'],
            staircase=False)
        tf.summary.scalar('training/learning_rate',
                          learning_rate,
                          collections=["training_status"])
    elif config['learning_rate_type'] == 'fixed':
        learning_rate = config['learning_rate']
    else:
        raise Exception("Invalid learning rate type")

    optimizer = tf.train.AdamOptimizer(learning_rate)
    # Gradient clipping and a sanity check.
    grads = list(
        zip(tf.gradients(model.loss, tf.trainable_variables()),
            tf.trainable_variables()))
    grads_clipped = []
    with tf.name_scope("grad_clipping"):
        for grad, var in grads:
            if grad is not None:
                if config['grad_clip_by_norm'] > 0:
                    grads_clipped.append(
                        (tf.clip_by_norm(grad,
                                         config['grad_clip_by_norm']), var))
                elif config['grad_clip_by_value'] > 0:
                    grads_clipped.append(
                        (tf.clip_by_value(grad, -config['grad_clip_by_value'],
                                          -config['grad_clip_by_value']), var))
                else:
                    grads_clipped.append((grad, var))
    train_op = optimizer.apply_gradients(grads_and_vars=grads_clipped,
                                         global_step=global_step)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)

    run_opts = None
    run_opts_metadata = None
    if config.get('create_timeline', False):
        run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE,
                                 timeout_in_ms=100000)
        run_opts_metadata = tf.RunMetadata()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
    if config['model_dir']:
        # If model directory already exists, continue training by restoring computation graph.
        # Restore variables.
        if config['checkpoint_id'] is None:
            checkpoint_path = tf.train.latest_checkpoint(config['model_dir'])
        else:
            checkpoint_path = os.path.join(config['model_dir'],
                                           config['checkpoint_id'])

        print("Continue training with model " + checkpoint_path)
        saver.restore(sess, checkpoint_path)

        step = tf.train.global_step(sess, global_step)
        start_epoch = round(
            step / (training_dataset.num_samples / config['batch_size']))
    else:
        # Fresh start
        # Create a unique output directory for this experiment.
        config['model_dir'] = get_model_dir_timestamp(
            base_path=config['model_save_dir'],
            prefix="tf",
            suffix=config['experiment_name'],
            connector="-")
        print("Saving to {}\n".format(config['model_dir']))
        start_epoch = 1
        step = 1

    coord = tf.train.Coordinator()
    data_feeder.init(
        sess, coord
    )  # Enqueue threads must be initialized after definition of train_op.
    if config.get('validate_model', False):
        valid_data_feeder.init(sess, coord)
    queue_threads = tf.train.start_queue_runners(coord=coord, sess=sess)
    queue_threads.append(data_feeder.enqueue_threads)

    # Register and create summary ops.
    summary_dir = os.path.join(config['model_dir'], "summary")
    summary_writer = tf.summary.FileWriter(summary_dir, sess.graph)

    # Create summaries to visualize weights and gradients.
    if config['tensorboard_verbose'] > 1:
        for grad, var in grads:
            tf.summary.histogram(var.name,
                                 var,
                                 collections=["training_status"])
            tf.summary.histogram(var.name + '/gradient',
                                 grad,
                                 collections=["training_status"])

    if config['tensorboard_verbose'] > 1:
        tf.summary.scalar(
            "training/queue",
            math_ops.cast(data_feeder.input_queue.size(), dtypes.float32) *
            (1. / data_feeder.queue_capacity),
            collections=["training_status"])

    # Save configuration
    config['loss_weights']['kld_loss'] = kld_loss_weight_backup
    try:
        # Pickle and json dump.
        pickle.dump(
            config, open(os.path.join(config['model_dir'], 'config.pkl'),
                         'wb'))
        json.dump(config,
                  open(os.path.join(config['model_dir'], 'config.json'), 'w'),
                  indent=4,
                  sort_keys=True)
    except:
        pass

    training_summary = tf.compat.v1.summary.merge_all('training_status')
    training_run_ops = [
        model.loss_summary, training_summary, model.ops_loss, train_op
    ]
    training_run_ops_with_img_summary = [
        model.loss_summary, training_summary, model.ops_loss,
        model.ops_img_summary, train_op
    ]

    if config.get('validate_model', False):
        validation_run_ops = [valid_model.ops_loss]

    if config['use_staging_area']:
        training_run_ops.append(staging_area.preload_op)
        training_run_ops_with_img_summary.append(staging_area.preload_op)
        # Fill staging area first.
        for i in range(256):
            _ = sess.run(staging_area.preload_op,
                         feed_dict={},
                         options=run_opts,
                         run_metadata=run_opts_metadata)

        if config.get('validate_model', False):
            validation_run_ops.append(valid_staging_area.preload_op)
            # Fill staging area first.
            for i in range(256):
                _ = sess.run(valid_staging_area.preload_op,
                             feed_dict={},
                             options=run_opts,
                             run_metadata=run_opts_metadata)

    for epoch in range(start_epoch, config['num_epochs'] + 1):
        for epoch_step in range(num_training_iterations):
            start_time = time.perf_counter()
            step = tf.train.global_step(sess, global_step)

            if (step % config['checkpoint_every_step']) == 0:
                ckpt_save_path = saver.save(
                    sess, os.path.join(config['model_dir'], 'model'),
                    global_step)
                print("Model saved in file: %s" % ckpt_save_path)

            if config['img_summary_every_step'] > 0 and step % config[
                    'img_summary_every_step'] == 0:
                run_training_output = sess.run(
                    training_run_ops_with_img_summary,
                    feed_dict={},
                    options=run_opts,
                    run_metadata=run_opts_metadata)

                img_summary = model.get_image_summary(
                    sess,
                    ops_img_summary_evaluated=run_training_output[3],
                    seq_len=500)
                summary_writer.add_summary(img_summary, step)
            else:
                run_training_output = sess.run(training_run_ops,
                                               feed_dict={},
                                               options=run_opts,
                                               run_metadata=run_opts_metadata)

            summary_writer.add_summary(run_training_output[0],
                                       step)  # Loss summary
            summary_writer.add_summary(run_training_output[1],
                                       step)  # Training status summary.

            if step % config['print_every_step'] == 0:
                time_elapsed = (time.perf_counter() -
                                start_time) / config['print_every_step']
                model.log_loss(run_training_output[2], step, epoch,
                               time_elapsed)

            if config['img_summary_every_step'] > 0 and step % config[
                    'img_summary_every_step'] == 0:
                sampling_img_summary = sampling_model.get_image_summary(
                    sess, ops_img_summary_evaluated=None, seq_len=500)
                summary_writer.add_summary(sampling_img_summary, step)

            if config.get('validate_model',
                          False) and step % config['validate_every_step'] == 0:
                start_time = time.perf_counter()
                for i in range(num_validation_iterations):
                    run_validation_output = sess.run(
                        validation_run_ops,
                        feed_dict={},
                        options=run_opts,
                        run_metadata=run_opts_metadata)
                    valid_model.update_validation_loss(
                        run_validation_output[0])

                valid_summary, valid_eval_loss = valid_model.get_validation_summary(
                    session=sess)
                summary_writer.add_summary(valid_summary,
                                           step)  # Validation loss summary

                time_elapsed = (time.perf_counter() -
                                start_time) / num_validation_iterations
                valid_model.log_loss(valid_eval_loss,
                                     step,
                                     epoch,
                                     time_elapsed,
                                     prefix="VALID: ")
                valid_model.reset_validation_loss()

            if config.get('create_timeline', False):
                create_tf_timeline(config['model_dir'], run_opts_metadata)

    print("End-of-Training.")
    ckpt_save_path = saver.save(sess, os.path.join(config['model_dir'],
                                                   'model'), global_step)
    print("Model saved in file: %s" % ckpt_save_path)
    print('Model is trained for %d epochs, %d steps.' %
          (config['num_epochs'], step))

    try:
        sess.run(data_feeder.input_queue.close(cancel_pending_enqueues=True))
        coord.request_stop()
        coord.join(queue_threads, stop_grace_period_secs=5)
    except:
        pass

    sess.close()

Example #24

Show file

File: minist_dnn_act修改版.py Project: BlueLenzLaw/-

    def __init__(self):
        # 误差图
        def plotloss():
            plt.figure()
            ax = plt.gca()
            y1 = R_variable['loss_test']
            y2 = R_variable['loss_train']
            plt.plot(y1, 'ro', label='Test')
            plt.plot(y2, 'g*', label='Train')
            # ax.set_xscale('log')
            ax.set_yscale('log')
            plt.legend(fontsize=18)
            plt.xlabel('Epoch', fontsize=15)
            plt.title('loss', fontsize=15)
            fntmp = '%sloss' % (self.FolderName)
            mySaveFig(plt, fntmp, ax=ax, isax=1, iseps=0)

        def plotacc():
            plt.figure()
            ax = plt.gca()
            y1 = R_variable['acc_test']
            y2 = R_variable['acc_train']
            plt.plot(y1, 'ro', label='Test')
            plt.plot(y2, 'g*', label='Train')
            # ax.set_xscale('log')
            # ax.set_yscale('log')
            plt.legend(fontsize=18)
            plt.xlabel('Epoch', fontsize=15)
            plt.title('accuracy', fontsize=15)
            fntmp = '%saccuracy' % (self.FolderName)
            mySaveFig(plt, fntmp, ax=ax, isax=1, iseps=0)

        # 保存文件
        def savefile():
            # 序列化变量R, 需要的话可以load出来
            with open('%s/objs.pkl' % (self.FolderName),
                      'wb') as f:  # Python 3: open(..., 'wb')
                pickle.dump(R_variable, f, protocol=4)

            # 保存变量R参数长度小于等于20的
            text_file = open("%s/Output.txt" % (self.FolderName), "w")
            for para in R_variable:
                if np.size(R_variable[para]) > 20:
                    continue
                text_file.write('%s: %s\n' % (para, R_variable[para]))
            text_file.close()

            # 保存loss到csv中
            da = pd.DataFrame(R_variable['loss_train'])
            da.to_csv(self.FolderName + "loss_train" + ".csv",
                      header=False,
                      columns=None)
            db = pd.DataFrame(R_variable['loss_test'])
            db.to_csv(self.FolderName + "loss_test" + ".csv",
                      header=False,
                      columns=None)
            dc = pd.DataFrame(R_variable['acc_train'])
            dc.to_csv(self.FolderName + "acc_train" + ".csv",
                      header=False,
                      columns=None)
            dd = pd.DataFrame(R_variable['acc_test'])
            dd.to_csv(self.FolderName + "acc_test" + ".csv",
                      header=False,
                      columns=None)

        # 记录误差值，L2，在每次画loss前更新(以防中期停止程序)
        def gapReocord():
            R_variable['final_train_loss'] = R_variable['loss_train'][-1]
            R_variable['final_test_loss'] = R_variable['loss_test'][-1]
            R_variable['final_train_acc'] = R_variable['acc_train'][-1]
            R_variable['final_test_acc'] = R_variable['acc_test'][-1]

        # 储存误差
        R_variable['loss_test'] = []
        R_variable['loss_train'] = []
        R_variable['acc_test'] = []
        R_variable['acc_train'] = []

        # 记时，创建新目录
        self.t0 = time.time()
        self.FolderName = mk_newfolder()
        self.x = tf.placeholder(tf.float32,
                                [None].extend(R_variable['graph_shape']),
                                name='x')
        self.y0 = tf.placeholder(tf.float32,
                                 shape=[None
                                        ].extend(R_variable['label_shape']),
                                 name='y0')

        dataset = tf.data.Dataset.from_tensor_slices((self.x, self.y0))
        dataset = dataset.shuffle(20).batch(R_variable['batch_size']).repeat()
        itetator = dataset.make_initializable_iterator()
        data_element = itetator.get_next()

        def weight_variable(shape, name=None):
            initial = tf.truncated_normal(shape, stddev=0.1)
            return tf.Variable(initial, name=name)

        def bias_variable(shape, name=None):
            initial = tf.constant(0.1, shape=shape)
            return tf.Variable(initial, name=name)

        def activation_fun(x, name=None):
            if (R_variable['ActFuc'] == 'relu'):
                z = tf.nn.relu(x, name=name)
            if (R_variable['ActFuc'] == 'tanh'):
                z = tf.nn.tanh(x, name=name)
            if (R_variable['ActFuc'] == 'srelu'):
                z = tf.nn.relu(-(x - 1)) * tf.nn.relu(x)
            return z

        # 全连接层
        x_flat = tf.reshape(
            self.x,
            [-1, R_variable['graph_shape'][0] * R_variable['graph_shape'][1]],
            name='x_flat')
        W_fc1 = weight_variable(
            [R_variable['graph_shape'][0] * R_variable['graph_shape'][1], 800],
            name='W_fc1')
        b_fc1 = bias_variable([800], name='b_fc1')
        h_fc1 = activation_fun(tf.matmul(x_flat, W_fc1) + b_fc1, name='h_fc1')

        # 全连接层
        W_fc2 = weight_variable([800, 800], name='W_fc2')
        b_fc2 = bias_variable([800], name='b_fc2')
        h_fc2 = activation_fun(tf.matmul(h_fc1, W_fc2) + b_fc2, name='h_fc2')

        # 全连接层
        W_fc3 = weight_variable([800, 512], name='W_fc3')
        b_fc3 = bias_variable([512], name='b_fc3')
        h_fc3 = activation_fun(tf.matmul(h_fc2, W_fc3) + b_fc3, name='h_fc3')

        # 全连接层
        W_fc4 = weight_variable([512, 64], name='W_fc4')
        b_fc4 = bias_variable([64], name='b_fc4')
        h_fc4 = activation_fun(tf.matmul(h_fc3, W_fc4) + b_fc4, name='h_fc4')

        # softmax
        W_fc5 = weight_variable([64, 10], name='W_fc5')
        b_fc5 = bias_variable([10], name='b_fc3')
        y_pre = tf.add(tf.matmul(h_fc4, W_fc5), b_fc5, name='y_pre')
        self.y = tf.nn.softmax(y_pre, name='y')

        # loss func
        self.cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.y,
                                                    labels=self.y0))
        # train aim
        self.train = tf.train.AdamOptimizer(
            learning_rate=R_variable['learning_rate']).minimize(
                self.cross_entropy)
        # accuracy
        self.result = tf.equal(tf.argmax(self.y, 1), tf.argmax(self.y0, 1))
        self.accuracy = tf.reduce_mean(tf.cast(self.result, tf.float32))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #服务器跑，可忽略
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(itetator.initializer,
                      feed_dict={
                          self.x: R_variable['train_inputs'],
                          self.y0: R_variable['y_true_train']
                      })

        for e in range(R_variable['epoch']):
            for s in range(R_variable['batch_num']):
                finished_batch = e * R_variable['batch_num'] + s + 1
                # training
                x_batch, y_batch = self.sess.run(data_element)
                self.sess.run(self.train,
                              feed_dict={
                                  self.x: x_batch,
                                  self.y0: y_batch
                              })
                acc_Test, loss_Test = self.sess.run(
                    [self.accuracy, self.cross_entropy],
                    feed_dict={
                        self.x: R_variable['test_inputs'],
                        self.y0: R_variable['y_true_test']
                    })
                if (acc_Test >= R_variable['breakstandard']):
                    R_variable['uesd batch'] = finished_batch
                    R_variable['uesd time'] = time.time() - self.t0
                    R_variable['flag'] = 1
                    break

                if s % 1000 == 0:
                    acc_Train, loss_Train = self.sess.run(
                        [self.accuracy, self.cross_entropy],
                        feed_dict={
                            self.x: x_batch,
                            self.y0: y_batch
                        })
                    acc_Test, loss_Test = self.sess.run(
                        [self.accuracy, self.cross_entropy],
                        feed_dict={
                            self.x: R_variable['test_inputs'],
                            self.y0: R_variable['y_true_test']
                        })
                    # R_variable['loss_train'].append(loss_Train)
                    # R_variable['loss_test'].append(loss_Test)
                    batch_needed = R_variable['epoch'] * R_variable[
                        'batch_num'] - finished_batch
                    round_time = time.time()
                    R_variable['use_time'] = round_time - self.t0
                    time_needed = (round_time -
                                   self.t0) / finished_batch * batch_needed
                    print(
                        "In epoch: %d, step: %d, Train accuracy is: %3.3f, Train loss is: %3.3f"
                        % (e + 1, s, acc_Train, loss_Train))
                    print("Test accuracy is: %3.3f, Test loss is: %3.3f" %
                          (acc_Test, loss_Test))
                    print(
                        "The program have been running for %ds, still need %ds"
                        % (round_time - self.t0, time_needed))
                    # savefile()
                    # gapReocord()

            acc_Train, loss_Train = self.sess.run(
                [self.accuracy, self.cross_entropy],
                feed_dict={
                    self.x: R_variable['train_inputs'],
                    self.y0: R_variable['y_true_train']
                })
            acc_Test, loss_Test = self.sess.run(
                [self.accuracy, self.cross_entropy],
                feed_dict={
                    self.x: R_variable['test_inputs'],
                    self.y0: R_variable['y_true_test']
                })
            R_variable['loss_train'].append(loss_Train)
            R_variable['loss_test'].append(loss_Test)
            R_variable['acc_train'].append(acc_Train)
            R_variable['acc_test'].append(acc_Test)
            savefile()
            gapReocord()
            plotloss()
            plotacc()
            if (R_variable['flag'] == 1):
                break

        print("Program ends. ")
        print("Train accuracy is: %3.3f, Train loss is: %3.3f" %
              (acc_Train, loss_Train))
        print("Test accuracy is: %3.3f, Test loss is: %3.3f" %
              (acc_Test, loss_Test))
        print("The program have been running for %ds." %
              (round_time - self.t0))

Example #25

Show file

File: get_box.py Project: trojerz/EarDetection-RetinaNet

from get_coordinates import get_coordinates
from PIL import Image
import pandas as pd
from show_image import show_image_objects
import os
from kerasretinanet.keras_retinanet import models
from kerasretinanet.keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from kerasretinanet.keras_retinanet.utils.visualization import draw_box, draw_caption
from kerasretinanet.keras_retinanet.utils.colors import label_color
import cv2
import matplotlib.pyplot as plt
import numpy as np

#some fixes so we can train model
import tensorflow.compat.v1 as tf1
config = tf1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf1.InteractiveSession(config=config)

#prepare test pictures and annotations
pic_list = [
    p for p in pathlib.Path('AWEForSegmentation/testannot_rect').iterdir()
    if p.is_file()
]
dataset = dict()
dataset['img_name'] = list()
dataset['x_min'] = list()
dataset['y_min'] = list()
dataset['x_max'] = list()
dataset['y_max'] = list()
dataset['class_name'] = list()

Example #26

Show file

File: fixed.py Project: MitchellTesla/google-research

def train(params):
    """Entry point for training."""
    with gfile.GFile(params.data_path, 'rb') as finp:
        x_train, x_valid, x_test, _, _ = pickle.load(finp)
        print('-' * 80)
        print('train_size: {0}'.format(np.size(x_train)))
        print('valid_size: {0}'.format(np.size(x_valid)))
        print(' test_size: {0}'.format(np.size(x_test)))

    g = tf.Graph()
    with g.as_default():
        tf.random.set_random_seed(2126)
        ops = get_ops(params, x_train, x_valid, x_test)
        run_ops = [
            ops['train_loss'],
            ops['grad_norm'],
            ops['learning_rate'],
            ops['should_reset'],
            ops['moving_avg_started'],
            ops['train_op'],
        ]

        saver = tf.train.Saver(max_to_keep=2)
        checkpoint_saver_hook = tf.train.CheckpointSaverHook(
            params.output_dir,
            save_steps=params.num_train_batches,
            saver=saver)
        hooks = [checkpoint_saver_hook]
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.train.SingularMonitoredSession(
            config=config, hooks=hooks, checkpoint_dir=params.output_dir)
        accum_loss = 0.
        accum_step = 0
        best_valid_ppl = []
        start_time = time.time()
        while True:
            try:
                loss, gn, lr, should_reset, moving_avg_started, _ = sess.run(
                    run_ops)
                accum_loss += loss
                accum_step += 1
                step = sess.run(ops['global_step'])
                if step % params.log_every == 0:
                    epoch = step // params.num_train_batches
                    train_ppl = np.exp(accum_loss / accum_step)
                    mins_so_far = (time.time() - start_time) / 60.
                    log_string = 'epoch={0:<5d}'.format(epoch)
                    log_string += ' step={0:<7d}'.format(step)
                    log_string += ' ppl={0:<10.2f}'.format(train_ppl)
                    log_string += ' lr={0:<6.3f}'.format(lr)
                    log_string += ' |g|={0:<6.3f}'.format(gn)
                    log_string += ' avg={0:<2d}'.format(moving_avg_started)
                    log_string += ' mins={0:<.2f}'.format(mins_so_far)
                    print(log_string)

                if moving_avg_started:
                    sess.run(ops['update_moving_avg'])

                # if step % params.num_train_batches == 0:
                if should_reset:
                    sess.run(ops['reset_batch_states'])
                    accum_loss = 0
                    accum_step = 0
                    valid_ppl = ops['eval_valid'](
                        sess, use_moving_avg=moving_avg_started)
                    sess.run(
                        [ops['reset_batch_states'], ops['reset_start_idx']])
                    if (not moving_avg_started and len(best_valid_ppl) >
                            params.best_valid_ppl_threshold and valid_ppl >
                            min(best_valid_ppl[:-params.
                                               best_valid_ppl_threshold])):
                        print('Starting moving_avg')
                        sess.run(ops['start_moving_avg'])
                    best_valid_ppl.append(valid_ppl)

                if step >= params.num_train_steps:
                    ops['eval_test'](sess, use_moving_avg=moving_avg_started)
                    break
            except tf.errors.InvalidArgumentError:
                last_checkpoint = tf.train.latest_checkpoint(params.output_dir)
                print('rolling back to previous checkpoint {0}'.format(
                    last_checkpoint))
                saver.restore(sess, last_checkpoint)
                accum_loss, accum_step = 0., 0
        sess.close()

Example #27

Show file

File: run_pretraining.py Project: muzzynine/examples-1

def build_graph(bert_config,
                opts,
                iterations_per_step=1,
                is_training=True,
                feed_name=None):
    """Build the graph for training.

    Args:
        bert_config: configuration for the BERT model.
        opts: a dictionary containing all global options.
        iterations_per_step: number of iterations per step
        is_training (bool): if true return a graph with trainable variables.
        feed_name: name of the IPU infeed.

    Returns:
        a GraphOps containing a BERT graph and session prepared for inference or training.
    """
    train_graph = tf.Graph()
    with train_graph.as_default():

        placeholders = dict()
        placeholders['learning_rate'] = tf.placeholder(bert_config.dtype,
                                                       shape=[])
        learning_rate = placeholders['learning_rate']

        train_iterator = ipu.ipu_infeed_queue.IPUInfeedQueue(
            dataset.data(opts, is_training=is_training),
            feed_name=feed_name + "_in",
            replication_factor=opts['replicas'])

        outfeed_queue = ipu.ipu_outfeed_queue.IPUOutfeedQueue(
            feed_name=feed_name + "_out", replication_factor=opts['replicas'])

        with ipu.scopes.ipu_scope('/device:IPU:0'):
            train = training_step_with_infeeds_and_outfeeds(
                bert_config,
                train_iterator,
                outfeed_queue,
                opts,
                learning_rate,
                iterations_per_step,
                is_training=is_training)

        outfeed = outfeed_queue.dequeue()

        bert_logging.print_trainable_variables(opts['logs_path'])

        model_variables = tf.trainable_variables() + tf.get_collection(
            tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)
        model_and_optimiser_variables = tf.global_variables()

        restore = tf.train.Saver(
            var_list=model_and_optimiser_variables
            if opts['restore_optimiser_from_ckpt'] else model_variables)

        # We store two savers: one for the standard training and another one for the best checkpoint
        savers = {
            "train_saver":
            tf.train.Saver(var_list=model_variables if opts['ckpt_model_only']
                           else model_and_optimiser_variables,
                           name='latest',
                           max_to_keep=5),
            "best_saver":
            tf.train.Saver(var_list=model_variables if opts['ckpt_model_only']
                           else model_and_optimiser_variables,
                           name='best',
                           max_to_keep=1)
        }

        ipu.utils.move_variable_initialization_to_cpu()
        train_init = tf.global_variables_initializer()
        tvars = tf.trainable_variables()

    # Calculate number of IPUs required for pretraining pipeline.
    num_embedding_ipu = {
        'two_ipus': 2,
        'same_ipu': 1,
        'same_as_hidden_layers': 0
    }[opts['embeddings_placement']]

    num_hidden_layer_stages = len(bert_config.hidden_layers_per_stage)
    num_ipus_required = opts['replicas'] * next_power_of_two(
        num_hidden_layer_stages + num_embedding_ipu)

    # Configure the IPU options.
    ipu_options = get_ipu_config(
        fp_exceptions=opts["fp_exceptions"],
        stochastic_rounding=opts['stochastic_rounding'],
        xla_recompute=opts["xla_recompute"],
        available_memory_proportion=opts['available_memory_proportion'],
        disable_graph_outlining=opts["disable_graph_outlining"],
        num_ipus_required=num_ipus_required,
        max_cross_replica_sum_buffer_size=opts[
            'max_cross_replica_sum_buffer_size'],
        scheduler_selection=opts['scheduler'],
        compile_only=opts['compile_only'],
        partials_type=opts['partials_type'])
    ipu.utils.configure_ipu_system(ipu_options)

    train_sess = tf.Session(graph=train_graph, config=tf.ConfigProto())

    return GraphOps(train_graph, train_sess, train_init, [train], placeholders,
                    train_iterator, outfeed, savers, restore, tvars)

Example #28

Show file

File: infer.py Project: helenxhou/SongExplorer

def main(_):
    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)
    np.set_printoptions(threshold=np.inf, linewidth=10000)

    flags = vars(FLAGS)
    for key in sorted(flags.keys()):
        tf.logging.info('%s = %s', key, flags[key])

    # Start a new TensorFlow session.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    #config.log_device_placement = False
    sess = tf.InteractiveSession(config=config)

    label_file = os.path.join(os.path.dirname(FLAGS.start_checkpoint),
                              "vgg_labels.txt")
    fid = open(label_file)
    labels = []
    for line in fid:
        labels.append(line.rstrip())
    label_count = len(labels)
    fid.close()

    model_settings = models.prepare_model_settings(
        label_count, FLAGS.sample_rate, FLAGS.nchannels,
        FLAGS.clip_duration_ms, FLAGS.representation, FLAGS.window_size_ms,
        FLAGS.window_stride_ms, 1, FLAGS.dct_coefficient_count,
        FLAGS.filterbank_channel_count,
        [int(x) for x in FLAGS.filter_counts.split(',')],
        [int(x)
         for x in FLAGS.filter_sizes.split(',')], FLAGS.final_filter_len,
        FLAGS.dropout_prob, FLAGS.batch_size, FLAGS.dilate_after_layer,
        FLAGS.stride_after_layer, FLAGS.connection_type)

    fingerprint_size = model_settings['fingerprint_size']
    time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)

    fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size],
                                       name='fingerprint_input')

    hidden, logits = models.create_model(fingerprint_input,
                                         model_settings,
                                         FLAGS.model_architecture,
                                         is_training=False)

    tf.global_variables_initializer().run()

    models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)

    total_parameters = 0
    for variable in tf.trainable_variables():
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= int(dim)
        total_parameters += variable_parameters
    tf.logging.info('number of trainable parameters: %d', total_parameters)

    audio_processor = input_data.AudioProcessor(
        FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage,
        FLAGS.unknown_percentage, FLAGS.wanted_words.split(','),
        FLAGS.labels_touse.split(','),
        FLAGS.validation_percentage, FLAGS.validation_offset_percentage,
        FLAGS.validation_files.split(','), FLAGS.testing_percentage,
        FLAGS.testing_files.split(','), FLAGS.subsample_skip,
        FLAGS.subsample_word, FLAGS.partition_word, FLAGS.partition_n,
        FLAGS.partition_training_files.split(','),
        FLAGS.partition_validation_files.split(','), -1,
        FLAGS.testing_equalize_ratio, FLAGS.testing_max_samples,
        model_settings)

    testing_set_size = audio_processor.set_size('testing')

    for isample in xrange(0, testing_set_size, FLAGS.batch_size):
        fingerprints, _, samples = (audio_processor.get_data(
            FLAGS.batch_size, isample, model_settings, 0.0, 0.0,
            0.0 if FLAGS.time_shift_random else time_shift_samples,
            FLAGS.time_shift_random, 'testing', sess))
        needed = FLAGS.batch_size - fingerprints.shape[0]
        if needed > 0:
            fingerprints = np.append(fingerprints,
                                     np.repeat(fingerprints[[0], :],
                                               needed,
                                               axis=0),
                                     axis=0)
            for _ in range(needed):
                samples.append(samples[0])
        logit_vals, hidden_vals = sess.run([logits, hidden],
                                           feed_dict={
                                               fingerprint_input: fingerprints,
                                           })
        batch_size = min(FLAGS.batch_size, testing_set_size - isample)
        obtained = FLAGS.batch_size - needed
        if isample == 0:
            samples_data = [None] * testing_set_size
        samples_data[isample:isample + obtained] = samples[:obtained]
        if FLAGS.save_activations:
            if isample == 0:
                activations = []
                for ihidden in range(len(hidden_vals)):
                    nHWC = np.shape(hidden_vals[ihidden])[1:]
                    activations.append(np.empty((testing_set_size, *nHWC)))
                activations.append(
                    np.empty((testing_set_size, np.shape(logit_vals)[1])))
            for ihidden in range(len(hidden_vals)):
                activations[ihidden][isample:isample+obtained,:,:] = \
                      hidden_vals[ihidden][:obtained,:,:,:]
            activations[-1][isample:isample +
                            obtained, :] = logit_vals[:obtained, :]
        if FLAGS.save_fingerprints:
            if isample == 0:
                nW = round((FLAGS.clip_duration_ms - FLAGS.window_size_ms) / \
                           FLAGS.window_stride_ms + 1)
                nH = round(np.shape(fingerprints)[1] / nW)
                input_layer = np.empty((testing_set_size, nW, nH))
            input_layer[isample:isample+obtained,:,:] = \
                  np.reshape(fingerprints[:obtained,:],(obtained,nW,nH))
    if FLAGS.save_activations:
        np.savez(os.path.join(FLAGS.data_dir,'activations.npz'), \
                 *activations, samples=samples_data, labels=labels)
    if FLAGS.save_fingerprints:
        np.save(os.path.join(FLAGS.data_dir, 'fingerprints.npy'), input_layer)

Example #29

Show file

File: train.py Project: namelessCrusader/Gpt-2-compat-tf2

def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(args.model_name)
    hparams = model.default_hparams()
    with open(os.path.join('..//models', args.model_name,
                           'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if args.model_name == '345M':
        args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=args.learning_rate)
        else:
            exit('Bad optimizer:', args.optimizer)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=all_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join('..//models', args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join('..//models', args.model_name))
        else:
            ckpt = tf.train.latest_checkpoint(args.restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc,
                              args.dataset,
                              args.combine,
                              encoding=args.encoding)
        data_sampler = Sampler(chunks)
        if args.val_every > 0:
            if args.val_dataset:
                val_chunks = load_dataset(enc,
                                          args.val_dataset,
                                          args.combine,
                                          encoding=args.encoding)
            else:
                val_chunks = chunks
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_data_sampler = Sampler(val_chunks, seed=1)
            val_batches = [[
                val_data_sampler.sample(1024)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name,
                                    '..//model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(os.path.join(SAMPLE_DIR, args.run_name,
                                   'samples-{}').format(counter),
                      'w',
                      encoding=args.encoding) as fp:
                fp.write('\n'.join(all_text))

        def validation():
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                losses.append(
                    sess.run(val_loss, feed_dict={val_context: batch}))
            v_val_loss = np.mean(losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.
                  format(counter=counter,
                         time=time.time() - start_time,
                         loss=v_val_loss))

        def sample_batch():
            return [data_sampler.sample(1024) for _ in range(args.batch_size)]

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while True:
                if counter % args.save_every == 0:
                    save()
                if counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                if args.accumulate_gradients > 1:
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        sess.run(opt_compute,
                                 feed_dict={context: sample_batch()})
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: sample_batch()})

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()

Example #30

Show file

File: experiment.py Project: sarthakksu/covid-low-income-bam

def run_training():
    """Train."""
    with tf.Graph().as_default():
        # Input images and labels.
        features = get_features(True, FLAGS.batch_size)
        model = f_model.multi_gpu_model
        print('so far so good!')
        result = model(features)

        # TODO(sasabour): merge jit scopes after jit scopes where enabled.
        merged = result['summary']
        train_step = result['train']
        # test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        saver = tf.train.Saver(max_to_keep=FLAGS.keep_ckpt)
        if tf.gfile.Exists(FLAGS.summary_dir + '/train'):
            ckpt = tf.train.get_checkpoint_state(FLAGS.summary_dir + '/train/')
            print(ckpt)
            if (not FLAGS.restart) and ckpt and ckpt.model_checkpoint_path:
                print('hesllo')
                saver.restore(sess, ckpt.model_checkpoint_path)
                prev_step = int(
                    ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
            else:
                print('what??')
                tf.gfile.DeleteRecursively(FLAGS.summary_dir + '/train')
                tf.gfile.MakeDirs(FLAGS.summary_dir + '/train')
                prev_step = 0
        else:
            tf.gfile.MakeDirs(FLAGS.summary_dir + '/train')
            prev_step = 0
        train_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/train',
                                             sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            step = 0
            for i in range(prev_step, FLAGS.max_steps):
                step += 1
                summary, _ = sess.run([merged, train_step])
                train_writer.add_summary(summary, i)
                if (i + 1) % FLAGS.checkpoint_steps == 0:
                    saver.save(sess,
                               os.path.join(FLAGS.summary_dir + '/train',
                                            'model.ckpt'),
                               global_step=i + 1)
        except tf.errors.OutOfRangeError:
            print('Done training for %d steps.' % step)
        finally:
            # When done, ask the threads to stop.
            coord.request_stop()
        train_writer.close()
        # Wait for threads to finish.
        coord.join(threads)
        sess.close()