Ejemplo n.º 1
0
def _main_():
    # #################
    # Setup export path
    ###################
    version    = 1
    output_dir = os.path.join(output_loc, model_name)
    export_path = os.path.join(output_dir, str(version))

    # ######################
    #  Interference Pipeline
    # ######################
    input_names = 'image_tensor'
    output_names = ['detection_boxes', 'detection_classes', 'detection_scores', 'num_detections']

    with tf.Session() as sess:
        input_tensor = tf.placeholder(dtype=tf.uint8, shape=(None, None, None, 3), name=input_names)
        # ###################
        # load frozen graph
        # ###################
        graph_def = load_graph_from_pb(frozen_graph)
        outputs = tf.import_graph_def(graph_def,
                                      input_map={'image_tensor': input_tensor},
                                      return_elements=output_names,
                                      name='')
        outputs = [sess.graph.get_tensor_by_name(ops.name +':0')for ops in outputs]
        outputs = dict(zip(output_names, outputs))

    # #####################
    # Quantize Frozen Model
    # #####################
    transforms = ["add_default_attributes",
                  "quantize_weights", "round_weights",
                  "fold_batch_norms", "fold_old_batch_norms"]

    quantized_graph = TransformGraph(input_graph_def=graph_def,
                                     inputs=input_names,
                                     outputs=output_names,
                                     transforms=transforms)

    # #####################
    # Export to TF Serving#
    # #####################
    # Reference: https://github.com/tensorflow/models/tree/master/research/object_detection

    with tf.Graph().as_default():
        tf.import_graph_def(quantized_graph, name='')

        # Optimizing graph
        rewrite_options = rewriter_config_pb2.RewriterConfig()
        rewrite_options.optimizers.append('pruning')
        rewrite_options.optimizers.append('constfold')
        rewrite_options.optimizers.append('layout')
        graph_options = tf.GraphOptions(rewrite_options=rewrite_options, infer_shapes=True)

        # Build model for TF Serving
        config = tf.ConfigProto(graph_options=graph_options)

        # @TODO: add XLA for higher performance (AOT for ARM, JIT for x86/GPUs)
        # https://www.tensorflow.org/performance/xla/
        # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        # Reference:
        # https://www.tensorflow.org/guide/saved_model
        with session.Session(config=config) as sess:
            builder = tf.saved_model.builder.SavedModelBuilder(export_path)
            tensor_info_inputs = {'inputs': tf.saved_model.utils.build_tensor_info(input_tensor)}
            tensor_info_outputs = {}
            for k, v in outputs.items():
                tensor_info_outputs[k] = tf.saved_model.utils.build_tensor_info(v)

            detection_signature = (
                    tf.saved_model.signature_def_utils.build_signature_def(
                            inputs     = tensor_info_inputs,
                            outputs    = tensor_info_outputs,
                            method_name= signature_constants.PREDICT_METHOD_NAME))

            builder.add_meta_graph_and_variables(
                    sess, [tf.saved_model.tag_constants.SERVING], #tag_constants.SERVING is IMP to specify as this indicates the saved graph is meant for serving
                    signature_def_map={'predict_images': detection_signature,
                                       signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: detection_signature,
                                       },
            )
            builder.save()

    print("\n\nModel is ready for TF Serving. (saved at {}/saved_model.pb)".format(export_path))
Ejemplo n.º 2
0
    X, Z1, _X = [], [], []
    X.append(tf.random_uniform([dim, dim], 0, 10, name='X' + str(0)))
    _X.append(tf.placeholder(dtype=tf.float32, shape=[dim, dim]))
    Z1.append(tf.matmul(_X[0], _X[0]))

with tf.device(dev2):
    Y, Z2, _Y = [], [], []
    Y.append(tf.random_uniform([dim, dim], 0, 10, name='Y' + str(0)))
    _Y.append(tf.placeholder(dtype=tf.float32, shape=[dim, dim]))
    Z2.append(tf.matmul(_Y[0], _Y[0]))

with tf.device(dev3):
    Z3 = []
    Z3.append(tf.add(Z2[0], Z1[0]))

config_proto = tf.ConfigProto(graph_options=tf.GraphOptions(
    build_cost_model=1))
config_proto.intra_op_parallelism_threads = 1
config_proto.inter_op_parallelism_threads = 1
config_proto.graph_options.optimizer_options.opt_level = -1
config_proto.graph_options.rewrite_options.constant_folding = (
    rewriter_config_pb2.RewriterConfig.OFF)
config_proto.graph_options.rewrite_options.arithmetic_optimization = (
    rewriter_config_pb2.RewriterConfig.OFF)
config_proto.graph_options.rewrite_options.dependency_optimization = (
    rewriter_config_pb2.RewriterConfig.OFF)
config_proto.graph_options.rewrite_options.layout_optimizer = (
    rewriter_config_pb2.RewriterConfig.OFF)

sess = tf.Session(config=config_proto)
sess.run(tf.global_variables_initializer())
Ejemplo n.º 3
0
 def _add_infer_shapes(graph_def):
     with tf.Graph().as_default():
         with tf.Session(config=tf.ConfigProto(
                 graph_options=tf.GraphOptions(infer_shapes=True))) as sess:
             tf.import_graph_def(graph_def, name="")
         return sess.graph_def
def create_session():
  optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
  config = tf.ConfigProto(operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options))
  #  config.graph_options.rewrite_options.constant_folding = rewriter_config_pb2.RewriterConfig.OFF
  config.graph_options.place_pruned_graph = True
  return tf.Session(config=config)
Ejemplo n.º 5
0
def main(unused_argv):
  params = hyperparameters.get_hyperparameters(FLAGS.default_hparams_file,
                                               FLAGS.hparams_file, FLAGS,
                                               FLAGS.hparams)
  tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
      FLAGS.tpu if (FLAGS.tpu or params['use_tpu']) else '',
      zone=FLAGS.tpu_zone,
      project=FLAGS.gcp_project)

  if params['use_async_checkpointing']:
    save_checkpoints_steps = None
  else:
    save_checkpoints_steps = max(2500, params['iterations_per_loop'])
  config = contrib_tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=get_model_dir(params),
      save_checkpoints_steps=save_checkpoints_steps,
      keep_checkpoint_max=None,  # Keep all checkpoints.
      log_step_count_steps=FLAGS.log_step_count_steps,
      session_config=tf.ConfigProto(
          graph_options=tf.GraphOptions(
              rewrite_options=rewriter_config_pb2.RewriterConfig(
                  disable_meta_optimizer=True))),
      tpu_config=contrib_tpu.TPUConfig(
          iterations_per_loop=params['iterations_per_loop'],
          num_shards=params['num_cores'],
          # copybara:strip_begin
          tpu_job_name=FLAGS.tpu_job_name,
          # copybara:strip_end
          per_host_input_for_training=contrib_tpu.InputPipelineConfig
          .PER_HOST_V2))  # pylint: disable=line-too-long

  resnet_classifier = contrib_tpu.TPUEstimator(
      use_tpu=params['use_tpu'],
      model_fn=resnet_model_fn,
      config=config,
      params=params,
      train_batch_size=params['train_batch_size'],
      eval_batch_size=params['eval_batch_size'],
      export_to_tpu=FLAGS.export_to_tpu)

  # copybara:strip_begin
  if FLAGS.xla_compile:
    resnet_classifier = contrib_tpu.TPUEstimator(
        use_tpu=params['use_tpu'],
        model_fn=xla.estimator_model_fn(resnet_model_fn),
        config=config,
        params=params,
        train_batch_size=params['train_batch_size'],
        eval_batch_size=params['eval_batch_size'],
        export_to_tpu=FLAGS.export_to_tpu)
  # copybara:strip_end
  assert (params['precision'] == 'bfloat16' or
          params['precision'] == 'float32'), (
              'Invalid value for precision parameter; '
              'must be bfloat16 or float32.')
  tf.logging.info('Precision: %s', params['precision'])
  use_bfloat16 = params['precision'] == 'bfloat16'

  # Input pipelines are slightly different (with regards to shuffling and
  # preprocessing) between training and evaluation.
  if FLAGS.bigtable_instance:
    tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table)
    select_train, select_eval = _select_tables_from_flags()
    imagenet_train = imagenet_input.ImageNetBigtableInput(
        is_training=True,
        use_bfloat16=use_bfloat16,
        transpose_input=params['transpose_input'],
        selection=select_train)
    imagenet_eval = imagenet_input.ImageNetBigtableInput(
        is_training=False,
        use_bfloat16=use_bfloat16,
        transpose_input=params['transpose_input'],
        selection=select_eval)
  else:
    if FLAGS.data_dir == FAKE_DATA_DIR:
      tf.logging.info('Using fake dataset.')
    else:
      tf.logging.info('Using dataset: %s', FLAGS.data_dir)
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(
            is_training=is_training,
            data_dir=FLAGS.data_dir,
            transpose_input=params['transpose_input'],
            cache=params['use_cache'] and is_training,
            image_size=params['image_size'],
            num_parallel_calls=params['num_parallel_calls'],
            use_bfloat16=use_bfloat16) for is_training in [True, False]
    ]

  steps_per_epoch = params['num_train_images'] // params['train_batch_size']
  eval_steps = params['num_eval_images'] // params['eval_batch_size']

  if FLAGS.mode == 'eval':

    # Run evaluation when there's a new checkpoint
    for ckpt in evaluation.checkpoints_iterator(
        get_model_dir(params), timeout=FLAGS.eval_timeout):
      tf.logging.info('Starting to evaluate.')
      try:
        start_timestamp = time.time()  # This time will include compilation time
        eval_results = resnet_classifier.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt)
        elapsed_time = int(time.time() - start_timestamp)
        tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results,
                        elapsed_time)

        # Terminate eval job when final checkpoint is reached
        current_step = int(os.path.basename(ckpt).split('-')[1])
        if current_step >= params['train_steps']:
          tf.logging.info('Evaluation finished after training step %d',
                          current_step)
          break

      except tf.errors.NotFoundError:
        # Since the coordinator is on a different job than the TPU worker,
        # sometimes the TPU worker does not finish initializing until long after
        # the CPU job tells it to start evaluating. In this case, the checkpoint
        # file could have been deleted already.
        tf.logging.info('Checkpoint %s no longer exists, skipping checkpoint',
                        ckpt)

  elif FLAGS.mode == 'eval_igt':
    # IGT evaluation mode. Evaluate metrics for the desired parameters
    # (true or shifted) on the desired dataset (train or eval). Note that
    # train is still with data augmentation.

    # Get checkpoint file names.
    index_files = tf.gfile.Glob(
        os.path.join(get_model_dir(params), 'model.ckpt-*.index'))
    checkpoints = [fn[:-len('.index')] for fn in index_files]
    # Need to sort them to get proper tensorboard plotting (increasing event
    # timestamps correspond to increasing steps).
    checkpoint_steps = []
    for ckpt in checkpoints:
      tf.logging.info(ckpt)
      step_match = re.match(r'.*model.ckpt-([0-9]*)', ckpt)
      checkpoint_steps.append(int(step_match.group(1)))
    checkpoints = [
        ckpt for _, ckpt in sorted(zip(checkpoint_steps, checkpoints))
    ]
    tf.logging.info('There are {} checkpoints'.format(len(checkpoints)))
    tf.logging.info(', '.join(checkpoints))

    # Keep track of the last processed checkpoint (fault tolerance).
    analysis_state_path = os.path.join(
        get_model_dir(params),
        'analysis_state_' + FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode)
    next_analysis_index = 0
    if tf.gfile.Exists(analysis_state_path):
      with tf.gfile.Open(analysis_state_path) as fd:
        next_analysis_index = int(fd.read())

    # Process each checkpoint.
    while next_analysis_index < len(checkpoints):
      tf.logging.info('Next analysis index: {}'.format(next_analysis_index))
      ckpt_path = checkpoints[next_analysis_index]
      tf.logging.info('Starting to evaluate: {}.'.format(ckpt_path))
      start_timestamp = time.time()  # This time will include compilation time

      if FLAGS.igt_eval_set == 'train':
        the_input_fn = imagenet_train.input_fn
        the_steps = steps_per_epoch
      elif FLAGS.igt_eval_set == 'eval':
        the_input_fn = imagenet_eval.input_fn
        the_steps = eval_steps
      else:
        raise ValueError('Unsupported igt_eval_set')

      eval_results = resnet_classifier.evaluate(
          input_fn=the_input_fn,
          steps=the_steps,
          checkpoint_path=ckpt_path,
          name=FLAGS.igt_eval_set + '_' + FLAGS.igt_eval_mode)
      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results,
                      elapsed_time)

      next_analysis_index += 1
      file_io.atomic_write_string_to_file(analysis_state_path,
                                          str(next_analysis_index))

  else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
    current_step = estimator._load_global_step_from_checkpoint_dir(get_model_dir(params))  # pylint:disable=protected-access,g-line-too-long
    steps_per_epoch = params['num_train_images'] // params['train_batch_size']
    tf.logging.info(
        'Training for %d steps (%.2f epochs in total). Current'
        ' step %d.', params['train_steps'],
        params['train_steps'] / steps_per_epoch, current_step)

    start_timestamp = time.time()  # This time will include compilation time

    if FLAGS.mode == 'train':
      hooks = []
      if params['use_async_checkpointing']:
        hooks.append(
            async_checkpoint.AsyncCheckpointSaverHook(
                checkpoint_dir=get_model_dir(params),
                save_steps=max(2500, params['iterations_per_loop'])))
      resnet_classifier.train(
          input_fn=imagenet_train.input_fn,
          max_steps=params['train_steps'],
          hooks=hooks)

    else:
      assert FLAGS.mode == 'train_and_eval'
      while current_step < params['train_steps']:
        # Train for up to steps_per_eval number of steps.
        # At the end of training, a checkpoint will be written to --model_dir.
        next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                              params['train_steps'])
        resnet_classifier.train(
            input_fn=imagenet_train.input_fn, max_steps=next_checkpoint)
        current_step = next_checkpoint

        tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                        next_checkpoint, int(time.time() - start_timestamp))

        # Evaluate the model on the most recent model in --model_dir.
        # Since evaluation happens in batches of --eval_batch_size, some images
        # may be excluded modulo the batch size. As long as the batch size is
        # consistent, the evaluated images are also consistent.
        tf.logging.info('Starting to evaluate.')
        eval_results = resnet_classifier.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=params['num_eval_images'] // params['eval_batch_size'])
        tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                        eval_results)

      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                      params['train_steps'], elapsed_time)

    if FLAGS.export_dir is not None:
      # The guide to serve a exported TensorFlow model is at:
      #    https://www.tensorflow.org/serving/serving_basic
      tf.logging.info('Starting to export model.')
      unused_export_path = resnet_classifier.export_saved_model(
          export_dir_base=FLAGS.export_dir,
          serving_input_receiver_fn=imagenet_input.image_serving_input_fn)
Ejemplo n.º 6
0
def main():
    # Prepare training and testing data

    loadpath = "./"

    src_file = loadpath + "Pairs2M.src.num"
    tgt_file = loadpath + "Pairs2M.tgt.num"
    dic_file = loadpath + "Pairs2M.reddit.dic"

    opt = Options()
    opt_t = Options()

    train, val, test, wordtoix, ixtoword = read_pair_data_full(
        src_file,
        tgt_file,
        dic_file,
        max_num=opt.data_size,
        p_f=loadpath + 'demo.p')
    train = [
        x for x in train
        if 2 < len(x[1]) < opt.maxlen - 4 and 2 < len(x[0]) < opt_t.maxlen - 4
    ]
    val = [
        x for x in val
        if 2 < len(x[1]) < opt.maxlen - 4 and 2 < len(x[0]) < opt_t.maxlen - 4
    ]

    if TEST_FLAG:
        test = test + val + train
        opt.test_freq = 1

    opt.n_words = len(ixtoword)
    opt_t.n_words = len(ixtoword)
    print dict(opt)
    if opt.model == 'cnn_rnn':
        opt_t.maxlen = opt_t.maxlen - opt_t.filter_shape + 1
        opt_t.update_params()
        print dict(opt_t)

    print('Total words: %d' % opt.n_words)

    # load w2v
    if os.path.exists(opt.embedding_path_lime):
        with open(opt.embedding_path_lime, 'rb') as pfile:
            embedding = cPickle.load(pfile)
    else:
        w2v = gensim.models.KeyedVectors.load_word2vec_format(
            opt.embedding_path, binary=True)
        embedding = {
            i: copy.deepcopy(w2v[ixtoword[i]])
            for i in range(opt.n_words) if ixtoword[i] in w2v
        }
        with open(opt.embedding_path_lime, 'wb') as pfile:
            cPickle.dump(embedding, pfile, protocol=cPickle.HIGHEST_PROTOCOL)

    for d in ['/gpu:0']:
        with tf.device(d):
            src_ = tf.placeholder(tf.int32,
                                  shape=[opt.batch_size, opt.sent_len])
            tgt_ = tf.placeholder(tf.int32,
                                  shape=[opt_t.batch_size, opt_t.sent_len])
            res_, gan_cost_d_, train_op_d, gan_cost_g_, train_op_g = dialog_gan(
                src_, tgt_, opt, opt_t)
            merged = tf.summary.merge_all()

    uidx = 0
    graph_options = tf.GraphOptions(build_cost_model=1)
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True,
                            graph_options=graph_options)
    config.gpu_options.per_process_gpu_memory_fraction = 0.95

    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    run_metadata = tf.RunMetadata()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:

                t_vars = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES)  #tf.trainable_variables()

                if opt.load_from_ae:
                    save_keys = tensors_key_in_file(
                        opt.load_path)  #t_var g_W:0    key: W
                    ss = [
                        var for var in t_vars
                        if var.name[2:][:-2] in save_keys.keys()
                    ]
                    ss = [
                        var.name[2:] for var in ss
                        if var.get_shape() == save_keys[var.name[2:][:-2]]
                    ]
                    cc = {
                        var.name[2:][:-2]: var
                        for var in t_vars if var.name[2:] in ss
                    }

                    loader = tf.train.Saver(var_list=cc)
                    loader.restore(sess, opt.load_path)

                    print("Loading variables from '%s'." % opt.load_path)
                    print(
                        "Loaded variables:" + " ".join(
                            [var.name
                             for var in t_vars if var.name[2:] in ss]))
                else:
                    save_keys = tensors_key_in_file(opt.load_path)
                    ss = [
                        var for var in t_vars
                        if var.name[:-2] in save_keys.keys()
                    ]
                    ss = [
                        var.name for var in ss
                        if var.get_shape() == save_keys[var.name[:-2]]
                    ]
                    loader = tf.train.Saver(
                        var_list=[var for var in t_vars if var.name in ss])
                    loader.restore(sess, opt.load_path)
                    print("Loading variables from '%s'." % opt.load_path)
                    print("Loaded variables:" + str(ss))
                    # load reverse model
                    try:
                        save_keys = tensors_key_in_file('./save/rev_model')
                        ss = [
                            var for var in t_vars
                            if var.name[:-2] in save_keys.keys()
                            and 'g_rev_' in var.name
                        ]
                        ss = [
                            var.name for var in ss
                            if var.get_shape() == save_keys[var.name[:-2]]
                        ]
                        loader = tf.train.Saver(
                            var_list=[var for var in t_vars if var.name in ss])
                        loader.restore(sess, './save/rev_model')
                        print(
                            "Loading reverse variables from ./save/rev_model")
                        print("Loaded variables:" + str(ss))
                    except Exception as e:
                        print("No reverse model loaded")

            except Exception as e:
                print 'Error: ' + str(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())
        loss_d, loss_g = 0, 0
        for epoch in range(opt.max_epochs):
            print("Starting epoch %d" % epoch)
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1

                tgt, src = zip(*[train[t] for t in train_index])
                x_batch = prepare_data_for_cnn(src, opt)  # Batch L

                y_batch = prepare_data_for_rnn(
                    tgt, opt_t, is_add_GO=False
                ) if opt.model == 'cnn_rnn' else prepare_data_for_cnn(
                    tgt, opt_t)

                feed = {src_: x_batch, tgt_: y_batch}

                if uidx % opt.d_freq == 0:
                    if profile:
                        _, loss_d = sess.run(
                            [train_op_d, gan_cost_d_],
                            feed_dict=feed,
                            options=tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE),
                            run_metadata=run_metadata)
                    else:
                        _, loss_d = sess.run([train_op_d, gan_cost_d_],
                                             feed_dict=feed)

                if uidx % opt.g_freq == 0:
                    if profile:
                        _, loss_g = sess.run(
                            [train_op_g, gan_cost_g_],
                            feed_dict=feed,
                            options=tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE),
                            run_metadata=run_metadata)
                    else:
                        _, loss_g = sess.run([train_op_g, gan_cost_g_],
                                             feed_dict=feed)

                if profile:
                    tf.contrib.tfprof.model_analyzer.print_model_analysis(
                        tf.get_default_graph(),
                        run_meta=run_metadata,
                        tfprof_options=tf.contrib.tfprof.model_analyzer.
                        PRINT_ALL_TIMING_MEMORY)
                    exit(0)

                if uidx % opt.valid_freq == 0:
                    VALID_SIZE = 1024
                    valid_multiplier = np.int(
                        np.floor(VALID_SIZE / opt.batch_size))
                    res_all, val_tgt_all, loss_val_d_all, loss_val_g_all = [], [], [], []
                    for val_step in range(valid_multiplier):
                        valid_index = np.random.choice(len(val),
                                                       opt.batch_size)
                        val_tgt, val_src = zip(*[val[t] for t in valid_index])
                        val_tgt_all.extend(val_tgt)
                        x_val_batch = prepare_data_for_cnn(val_src,
                                                           opt)  # Batch L

                        y_val_batch = prepare_data_for_rnn(
                            val_tgt, opt_t, is_add_GO=False
                        ) if opt.model == 'cnn_rnn' else prepare_data_for_cnn(
                            val_tgt, opt_t)

                        feed_val = {src_: x_val_batch, tgt_: y_val_batch}
                        loss_val_d, loss_val_g = sess.run(
                            [gan_cost_d_, gan_cost_g_], feed_dict=feed_val)
                        loss_val_d_all.append(loss_val_d)
                        loss_val_g_all.append(loss_val_g)
                        res = sess.run(res_, feed_dict=feed_val)
                        res_all.extend(res['syn_sent'])

                    print("Validation: loss D %f loss G %f " %
                          (np.mean(loss_val_d_all), np.mean(loss_val_g_all)))
                    #print "Val Perm :" + " ".join([ixtoword[x] for x in val_src_permutated[0] if x != 0])
                    print "Val Source:" + u' '.join([
                        ixtoword[x] for x in val_src[0] if x != 0
                    ]).encode('utf-8').strip()
                    print "Val Target :" + u' '.join([
                        ixtoword[x] for x in val_tgt[0] if x != 0
                    ]).encode('utf-8').strip()
                    print "Val Generated:" + u' '.join([
                        ixtoword[x] for x in res['syn_sent'][0] if x != 0
                    ]).encode('utf-8').strip()
                    print ""

                    val_set = [prepare_for_bleu(s) for s in val_tgt_all]
                    gen = [prepare_for_bleu(s) for s in res_all]

                    [bleu1s, bleu2s, bleu3s,
                     bleu4s] = cal_BLEU_4(gen, {0: val_set},
                                          is_corpus=opt.is_corpus)
                    [rouge1, rouge2, rouge3, rouge4, rougeL,
                     rouges] = cal_ROUGE(gen, {0: val_set},
                                         is_corpus=opt.is_corpus)
                    etp_score, dist_score = cal_entropy(gen)
                    bleu_nltk = cal_BLEU_4_nltk(gen,
                                                val_set,
                                                is_corpus=opt.is_corpus)
                    rel_score = cal_relevance(gen, val_set, embedding)

                    print 'Val BLEU: ' + ' '.join([
                        str(round(it, 3))
                        for it in (bleu_nltk, bleu1s, bleu2s, bleu3s, bleu4s)
                    ])
                    print 'Val Rouge: ' + ' '.join([
                        str(round(it, 3))
                        for it in (rouge1, rouge2, rouge3, rouge4)
                    ])
                    print 'Val Entropy: ' + ' '.join([
                        str(round(it, 3))
                        for it in (etp_score[0], etp_score[1], etp_score[2],
                                   etp_score[3])
                    ])
                    print 'Val Diversity: ' + ' '.join([
                        str(round(it, 3))
                        for it in (dist_score[0], dist_score[1], dist_score[2],
                                   dist_score[3])
                    ])
                    print 'Val Relevance(G,A,E): ' + ' '.join([
                        str(round(it, 3))
                        for it in (rel_score[0], rel_score[1], rel_score[2])
                    ])
                    print 'Val Avg. length: ' + str(
                        round(
                            np.mean([
                                len([y for y in x if y != 0]) for x in res_all
                            ]), 3))
                    print ""
                    summary = sess.run(merged, feed_dict=feed_val)
                    summary2 = tf.Summary(value=[
                        tf.Summary.Value(tag="bleu-2", simple_value=bleu2s),
                        tf.Summary.Value(tag="rouge-2", simple_value=rouge2),
                        tf.Summary.Value(tag="etp-4",
                                         simple_value=etp_score[3])
                    ])

                    test_writer.add_summary(summary, uidx)
                    test_writer.add_summary(summary2, uidx)

                if uidx % opt.test_freq == 0:
                    iter_num = np.int(np.floor(len(test) / opt.batch_size)) + 1
                    res_all, test_tgt_all = [], []

                    for i in range(iter_num):
                        test_index = range(i * opt.batch_size,
                                           (i + 1) * opt.batch_size)
                        test_tgt, test_src = zip(
                            *[test[t % len(test)] for t in test_index])
                        test_tgt_all.extend(test_tgt)
                        x_batch = prepare_data_for_cnn(test_src, opt)
                        y_batch = prepare_data_for_rnn(
                            test_tgt, opt_t, is_add_GO=False
                        ) if opt.model == 'cnn_rnn' else prepare_data_for_cnn(
                            test_tgt, opt_t)
                        feed = {src_: x_batch, tgt_: y_batch}
                        res = sess.run(res_, feed_dict=feed)
                        res_all.extend(res['syn_sent'])

                    test_set = [prepare_for_bleu(s) for s in test_tgt_all]
                    gen = [prepare_for_bleu(s) for s in res_all]
                    [bleu1s, bleu2s, bleu3s,
                     bleu4s] = cal_BLEU_4(gen, {0: test_set},
                                          is_corpus=opt.is_corpus)
                    [rouge1, rouge2, rouge3, rouge4, rougeL,
                     rouges] = cal_ROUGE(gen, {0: test_set},
                                         is_corpus=opt.is_corpus)
                    etp_score, dist_score = cal_entropy(gen)
                    bleu_nltk = cal_BLEU_4_nltk(gen,
                                                test_set,
                                                is_corpus=opt.is_corpus)
                    rel_score = cal_relevance(gen, test_set, embedding)

                    print 'Test BLEU: ' + ' '.join([
                        str(round(it, 3))
                        for it in (bleu_nltk, bleu1s, bleu2s, bleu3s, bleu4s)
                    ])
                    print 'Test Rouge: ' + ' '.join([
                        str(round(it, 3))
                        for it in (rouge1, rouge2, rouge3, rouge4)
                    ])
                    print 'Test Entropy: ' + ' '.join([
                        str(round(it, 3))
                        for it in (etp_score[0], etp_score[1], etp_score[2],
                                   etp_score[3])
                    ])
                    print 'Test Diversity: ' + ' '.join([
                        str(round(it, 3))
                        for it in (dist_score[0], dist_score[1], dist_score[2],
                                   dist_score[3])
                    ])
                    print 'Test Relevance(G,A,E): ' + ' '.join([
                        str(round(it, 3))
                        for it in (rel_score[0], rel_score[1], rel_score[2])
                    ])
                    print 'Test Avg. length: ' + str(
                        round(
                            np.mean([
                                len([y for y in x if y != 0]) for x in res_all
                            ]), 3))
                    print ''

                    if TEST_FLAG:
                        exit()

                if uidx % opt.print_freq == 0:
                    print("Iteration %d: loss D %f loss G %f" %
                          (uidx, loss_d, loss_g))

                    res = sess.run(res_, feed_dict=feed)

                    if opt.grad_penalty:
                        print "grad_penalty: " + str(res['gp'])
                    print "Source:" + u' '.join([
                        ixtoword[x] for x in x_batch[0] if x != 0
                    ]).encode('utf-8').strip()
                    print "Target:" + u' '.join([
                        ixtoword[x] for x in y_batch[0] if x != 0
                    ]).encode('utf-8').strip()
                    print "Generated:" + u' '.join([
                        ixtoword[x] for x in res['syn_sent'][0] if x != 0
                    ]).encode('utf-8').strip()
                    print ""

                    sys.stdout.flush()
                    summary = sess.run(merged, feed_dict=feed)
                    train_writer.add_summary(summary, uidx)

                if uidx % opt.save_freq == 0:
                    saver.save(sess, opt.save_path)
Ejemplo n.º 7
0
    def load(self,
             ckpt_path,
             hparams,
             master='local',
             batch_timeout_micros=80 * 1000,
             buckets=None):
        self.hparams = hparams
        self.buckets = buckets
        self.tpu_graph = tf.Graph()
        tpu_config = tf.ConfigProto(
            operation_timeout_in_ms=600 * 1000,
            allow_soft_placement=True,
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True)),
            isolate_session_state=True)
        # Find tpu master.
        print('master value set to:', master)
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            master, zone=None, project=None)
        master = tpu_cluster_resolver.get_master()
        self.sess = tf.Session(master, graph=self.tpu_graph, config=tpu_config)
        with self.tpu_graph.as_default():
            self.vocab_table = tf.contrib.lookup.index_to_string_table_from_file(
                self.vocab_prefix, default_value=vocab_utils.UNK)

        if self.scenario == 'Offline':
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(shape=(hparams.infer_batch_size,
                                                    hparams.src_max_len_infer),
                                             dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(
                    shape=(hparams.infer_batch_size), dtype=tf.int32)

                inputs = [[self.source, self.source_sequence_length]]
                self.predict_ops.append(self.offline_op(inputs))
        else:
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(
                    shape=[None, hparams.src_max_len_infer], dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(shape=[None],
                                                             dtype=tf.int32)
                inputs = [self.source, self.source_sequence_length]
                for _ in buckets:
                    self.predict_ops.append(
                        self.server_op(
                            inputs,
                            num_batch_threads=16,
                            max_batch_size=hparams.infer_batch_size,
                            batch_timeout_micros=batch_timeout_micros,
                            allowed_batch_sizes=[hparams.infer_batch_size],
                            max_enqueued_batches=10000))
                # Add longest sequence predict op.
                self.predict_ops.append(
                    self.server_op(
                        inputs,
                        num_batch_threads=16,
                        max_batch_size=hparams.infer_batch_size,
                        batch_timeout_micros=5000 * 1000,
                        allowed_batch_sizes=[hparams.infer_batch_size],
                        max_enqueued_batches=10000))

        with self.tpu_graph.as_default():
            vs = tf.global_variables()

            assign_ops = []
            var_map = {}
            with tf.variable_scope('f32', dtype=tf.float32):
                for i in vs:
                    if 'output_projection' in i.name:
                        new_var = tf.get_variable(
                            i.name[:-2], [i.shape[0], hparams.tgt_vocab_size])
                        assign_ops.append(
                            tf.assign(
                                i,
                                tf.pad(
                                    tf.cast(new_var, i.dtype),
                                    [[0, 0],
                                     [
                                         0, 128 *
                                         (hparams.tgt_vocab_size // 128 + 1) -
                                         hparams.tgt_vocab_size
                                     ]])))
                    else:
                        new_var = tf.get_variable(i.name[:-2], i.shape)
                        assign_ops.append(
                            tf.assign(i, tf.cast(new_var, i.dtype)))
                    var_map[i.name[:-2]] = new_var.name[:-2]

            self.sess.run(tpu.initialize_system())
            tf.train.init_from_checkpoint(ckpt_path, var_map)
            self.sess.run(tf.initializers.global_variables())
            self.sess.run(tf.tables_initializer())
            self.sess.run(assign_ops)

        return self
Ejemplo n.º 8
0
def run_model(opt, train, val, ixtoword):

    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:'+ str(params['Wemb'].shape) + ' opt: ' + str((opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        # is_train_ = tf.placeholder(tf.bool, name='is_train_')
        res_, g_loss_, d_loss_, gen_op, dis_op = textGAN(x_, x_org_, opt)
        merged = tf.summary.merge_all()
        # opt.is_train = False
        # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt)
        # merged_val = tf.summary.merge_all()

    #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006
    #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph())

    uidx = 0
    config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True, graph_options=tf.GraphOptions(build_cost_model=1))
    #config = tf.ConfigProto(device_count={'GPU':0})
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    run_metadata = tf.RunMetadata()

    with tf.Session(config = config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                #pdb.set_trace()

                t_vars = tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars])
                loader = restore_from_save(t_vars, sess, opt)
                print('\nload successfully\n')

            except Exception as e:
                print(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        # for i in range(34):
        #     valid_index = np.random.choice(
        #         len(val), opt.batch_size)
        #     val_sents = [val[t] for t in valid_index]
        #     val_sents_permutated = add_noise(val_sents, opt)
        #     x_val_batch = prepare_data_for_cnn(
        #         val_sents_permutated, opt)
        #     x_val_batch_org = prepare_data_for_rnn(val_sents, opt)
        #     res = sess.run(res_, feed_dict={
        #                     x_: x_val_batch, x_org_: x_val_batch_org})
        #     if i == 0:
        #         valid_text = res['syn_sent']
        #     else:
        #         valid_text = np.concatenate(
        #             (valid_text, res['syn_sent']), 0)

        # np.savetxt('./text_news/vae_words.txt', valid_text, fmt='%i', delimiter=' ')
        # pdb.set_trace()

        for epoch in range(opt.max_epochs):
            print("Starting epoch %d" % epoch)
            # if epoch >= 10:
            #     print("Relax embedding ")
            #     opt.fix_emb = False
            #     opt.batch_size = 2
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                sents = [train[t] for t in train_index]

                sents_permutated = add_noise(sents, opt)

                #sents[0] = np.random.permutation(sents[0])
                x_batch = prepare_data_for_cnn(sents_permutated, opt) # Batch L
                x_batch_org = prepare_data_for_rnn(sents, opt)
                d_loss = 0
                g_loss = 0
                if profile:
                    if uidx % opt.dis_steps == 0:
                        _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch, x_org_: x_batch_org},
                                             options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata)
                    if uidx % opt.gen_steps == 0:
                        _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch, x_org_: x_batch_org},
                                             options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata)
                else:
                    if uidx % opt.dis_steps == 0:
                        _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch, x_org_: x_batch_org})
                    if uidx % opt.gen_steps == 0:
                        _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch, x_org_: x_batch_org})
                
                ''' validation '''
                if uidx % opt.valid_freq == 0:
                    
                    valid_index = np.random.choice(len(val), opt.batch_size)
                    val_sents = [val[t] for t in valid_index]

                    val_sents_permutated = add_noise(val_sents, opt)

                    x_val_batch = prepare_data_for_cnn(val_sents_permutated, opt)
                    x_val_batch_org = prepare_data_for_rnn(val_sents, opt)

                    d_loss_val = sess.run(d_loss_, feed_dict={x_: x_val_batch, x_org_: x_val_batch_org})
                    g_loss_val = sess.run(g_loss_, feed_dict={x_: x_val_batch, x_org_: x_val_batch_org})


                    res = sess.run(res_, feed_dict={x_: x_val_batch, x_org_: x_val_batch_org})
                    print("Validation d_loss %f, g_loss %f  mean_dist %f" % (d_loss_val, g_loss_val, res['mean_dist']))
                    print("Sent:" + u' '.join([ixtoword[x] for x in res['syn_sent']
                                               [0] if x != 0]).encode('utf-8', 'ignore').decode("utf8").strip())
                    print("MMD loss %f, GAN loss %f" % (res['mmd'], res['gan']))
                    # np.savetxt('./text_arxiv/syn_val_words.txt', res['syn_sent'], fmt='%i', delimiter=' ')
                    if opt.discrimination:
                        print ("Real Prob %f Fake Prob %f" % (res['prob_r'], res['prob_f']))
                    
                    for i in range(4):
                        valid_index = np.random.choice(
                            len(val), opt.batch_size)
                        val_sents = [val[t] for t in valid_index]
                        val_sents_permutated = add_noise(val_sents, opt)
                        x_val_batch = prepare_data_for_cnn(
                            val_sents_permutated, opt)
                        x_val_batch_org = prepare_data_for_rnn(val_sents, opt)
                        res = sess.run(res_, feed_dict={
                                       x_: x_val_batch, x_org_: x_val_batch_org})
                        if i == 0:
                            valid_text = res['syn_sent']
                        else:
                            valid_text = np.concatenate(
                                (valid_text, res['syn_sent']), 0)

                    np.savetxt('./text_news/syn_val_words.txt',valid_text, fmt='%i', delimiter=' ')

                    val_set = [prepare_for_bleu(s) for s in val_sents]
                    [bleu2s, bleu3s, bleu4s] = cal_BLEU([prepare_for_bleu(s) for s in res['syn_sent']], {0: val_set})
                    print('Val BLEU (2,3,4): ' + ' '.join([str(round(it, 3)) for it in (bleu2s, bleu3s, bleu4s)]))

                    summary = sess.run(merged, feed_dict={x_: x_val_batch, x_org_: x_val_batch_org})
                    test_writer.add_summary(summary, uidx)
Ejemplo n.º 9
0
def main(unused_argv):

    input_image_size = FLAGS.input_image_size
    if not input_image_size:
        if FLAGS.model_name.startswith('efficientnet-edgetpu'):
            _, _, input_image_size, _ = efficientnet_edgetpu_builder.efficientnet_edgetpu_params(
                FLAGS.model_name)
        elif FLAGS.model_name.startswith('efficientnet-tpu'):
            _, _, input_image_size, _ = efficientnet_tpu_builder.efficientnet_tpu_params(
                FLAGS.model_name)
        elif FLAGS.model_name.startswith('efficientnet'):
            _, _, input_image_size, _ = efficientnet_builder.efficientnet_params(
                FLAGS.model_name)
        else:
            raise ValueError(
                'input_image_size must be set except for EfficientNet')

    # For imagenet dataset, include background label if number of output classes
    # is 1001
    include_background_label = False  #(FLAGS.num_label_classes == 1001)

    if FLAGS.tpu or FLAGS.use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    else:
        tpu_cluster_resolver = None

    if FLAGS.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long
    # Initializes model parameters.
    params = dict(steps_per_epoch=FLAGS.num_train_images /
                  FLAGS.train_batch_size,
                  use_bfloat16=FLAGS.use_bfloat16)
    est = tf.contrib.tpu.TPUEstimator(use_tpu=FLAGS.use_tpu,
                                      model_fn=model_fn,
                                      config=config,
                                      train_batch_size=FLAGS.train_batch_size,
                                      eval_batch_size=FLAGS.eval_batch_size,
                                      export_to_tpu=FLAGS.export_to_tpu,
                                      params=params)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    def build_imagenet_input(is_training):
        """Generate ImageNetInput for training and eval."""
        if FLAGS.bigtable_instance:
            tf.logging.info('Using Bigtable dataset, table %s',
                            FLAGS.bigtable_table)
            select_train, select_eval = _select_tables_from_flags()
            return imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=FLAGS.use_bfloat16,
                transpose_input=FLAGS.transpose_input,
                selection=select_train if is_training else select_eval,
                include_background_label=include_background_label,
                autoaugment_name=FLAGS.autoaugment_name,
                mixup_alpha=FLAGS.mixup_alpha)
        else:
            if FLAGS.data_dir == FAKE_DATA_DIR:
                tf.logging.info('Using fake dataset.')
            else:
                tf.logging.info('Using dataset: %s', FLAGS.data_dir)

            return imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                image_size=input_image_size,
                num_parallel_calls=FLAGS.num_parallel_calls,
                use_bfloat16=FLAGS.use_bfloat16,
                include_background_label=include_background_label,
                autoaugment_name=FLAGS.autoaugment_name,
                mixup_alpha=FLAGS.mixup_alpha,
                num_classes=FLAGS.num_label_classes)

    imagenet_train = build_imagenet_input(is_training=True)
    imagenet_eval = build_imagenet_input(is_training=False)

    if FLAGS.mode == 'eval':
        eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = est.evaluate(input_fn=imagenet_eval.input_fn,
                                            steps=eval_steps,
                                            checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', FLAGS.train_steps,
            FLAGS.train_steps / params['steps_per_epoch'], current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            est.train(input_fn=imagenet_train.input_fn,
                      max_steps=FLAGS.train_steps,
                      hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                est.train(input_fn=imagenet_train.input_fn,
                          max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = est.evaluate(input_fn=imagenet_eval.input_fn,
                                            steps=FLAGS.num_eval_images //
                                            FLAGS.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)
                ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
                utils.archive_ckpt(eval_results,
                                   eval_results['top_1_accuracy'], ckpt)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)
    if FLAGS.export_dir:
        export(est, FLAGS.export_dir, input_image_size)
Ejemplo n.º 10
0
def cfg():
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0,
                                            do_constant_folding=False)
    graph_options = tf.GraphOptions(optimizer_options=optimizer_options)
    return tf.ConfigProto(log_device_placement=True,
                          graph_options=graph_options)
Ejemplo n.º 11
0
def main(unused_argv):
  # Mnas optimize - set the proper image data format
  tf.keras.backend.set_image_data_format(FLAGS.data_format)
  # Mnas optimize - optimization flags
  # gpu_thread_count = 2
  # os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
  # os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
  # os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
  # os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
  # enable mixed precision? -> Not much benefits seen yet
  # os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"
  
  node0 = "172.31.11.9:6060"
  node1 = "172.31.1.33:6060"

  strategy = tf.distribute.MirroredStrategy() 
  if FLAGS.total_nodes > 1:
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(tf.distribute.experimental.CollectiveCommunication.NCCL)
    if not FLAGS.is_evaluator:
      if FLAGS.node_num == 0:
        os.environ['TF_CONFIG'] = json.dumps({
          'cluster': {
              'worker': [node0, node1]
          },
          'task': {'type': 'worker', 'index': 0}
        })
      else:
        os.environ['TF_CONFIG'] = json.dumps({
          'cluster': {
              'worker': [node0, node1]
          },
          'task': {'type': 'worker', 'index': 1}
        })
    else:
      os.environ['TF_CONFIG'] = json.dumps({
        'cluster': {
            'evaluator': ["localhost:6060"]
        },
        'task': {'type': 'evaluator', 'index': 0}
      })
  
  
  if FLAGS.use_async_checkpointing:
    save_checkpoints_steps = None
  else:
    save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
    
  gconfig = tf.ConfigProto(
          graph_options=tf.GraphOptions(
              rewrite_options=rewriter_config_pb2.RewriterConfig(
                  disable_meta_optimizer=True)))

  if FLAGS.use_xla:
    gconfig.session_config.graph_options.optimizer_options.global_jit_level = (tf.OptimizerOptions.ON_1)
  
  # mnasnet opt - check if this is required!
  gconfig.gpu_options.allow_growth = True
  #gconfig.session_config.gpu_options.visible_device_list = str(hvd.local_rank())
  
  config = tf.estimator.RunConfig(
      model_dir=FLAGS.model_dir,
      save_checkpoints_steps=save_checkpoints_steps,
      log_step_count_steps=FLAGS.log_step_count_steps,
      train_distribute=strategy,
      session_config=gconfig)  # pylint: disable=line-too-long

  print('mnasnet opt - config cluster spec', config.cluster_spec)
  
  # Initializes model parameters.
  params = dict(
      steps_per_epoch=FLAGS.num_train_images / FLAGS.train_batch_size,
      batch_size=FLAGS.train_batch_size,
      dtype = tf.float32,
      use_bfloat16=FLAGS.use_bfloat16,
      quantized_training=FLAGS.quantized_training)
  
  mnasnet_est = tf.estimator.Estimator(
      model_fn=mnasnet_model_fn,
      model_dir=FLAGS.model_dir,
      config=config,
      params=params)
  
  # Input pipelines are slightly different (with regards to shuffling and
  # preprocessing) between training and evaluation.
  if FLAGS.bigtable_instance:
    tf.logging.info('Using Bigtable dataset, table %s', FLAGS.bigtable_table)
    select_train, select_eval = _select_tables_from_flags()
    imagenet_train, imagenet_eval = [imagenet_input.ImageNetBigtableInput(
        is_training=is_training,
        use_bfloat16=False,
        transpose_input=FLAGS.transpose_input,
        selection=selection) for (is_training, selection) in
                                     [(True, select_train),
                                      (False, select_eval)]]
  else:
    if FLAGS.data_dir == FAKE_DATA_DIR:
      tf.logging.info('Using fake dataset.')
    else:
      tf.logging.info('Using dataset: %s', FLAGS.data_dir)
    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(
            is_training=is_training,
            data_dir=FLAGS.data_dir,
            transpose_input=FLAGS.transpose_input,
            cache=FLAGS.use_cache and is_training,
            image_size=FLAGS.input_image_size,
            num_parallel_calls=FLAGS.num_parallel_calls,
            use_bfloat16=FLAGS.use_bfloat16) for is_training in [True, False]
    ]

  if FLAGS.mode == 'eval':
    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
    # Run evaluation when there's a new checkpoint
    for ckpt in evaluation.checkpoints_iterator(
        FLAGS.model_dir, timeout=FLAGS.eval_timeout):
      tf.logging.info('Starting to evaluate.')
      try:
        start_timestamp = time.time()  # This time will include compilation time
        eval_results = mnasnet_est.evaluate(
            input_fn=imagenet_eval.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt)
        elapsed_time = int(time.time() - start_timestamp)
        tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results,
                        elapsed_time)

        # Terminate eval job when final checkpoint is reached
        current_step = int(os.path.basename(ckpt).split('-')[1])
        if current_step >= FLAGS.train_steps:
          tf.logging.info('Evaluation finished after training step %d',
                          current_step)
          break

      except tf.errors.NotFoundError:
        # Since the coordinator is on a different job than the TPU worker,
        # sometimes the TPU worker does not finish initializing until long after
        # the CPU job tells it to start evaluating. In this case, the checkpoint
        # file could have been deleted already.
        tf.logging.info('Checkpoint %s no longer exists, skipping checkpoint',
                        ckpt)

    if FLAGS.export_dir:
      export(mnasnet_est, FLAGS.export_dir, FLAGS.post_quantize)
  else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
    current_step = estimator._load_global_step_from_checkpoint_dir(  # pylint: disable=protected-access
        FLAGS.model_dir)

    tf.logging.info(
        'Training for %d steps (%.2f epochs in total). Current'
        ' step %d.', FLAGS.train_steps,
        FLAGS.train_steps / params['steps_per_epoch'], current_step)

    start_timestamp = time.time()  # This time will include compilation time

    if FLAGS.mode == 'train':
      hooks = []
      if FLAGS.use_async_checkpointing:
        hooks.append(
            async_checkpoint.AsyncCheckpointSaverHook(
                checkpoint_dir=FLAGS.model_dir,
                save_steps=max(100, FLAGS.iterations_per_loop)))
      mnasnet_est.train(
          input_fn=imagenet_train.input_fn,
          max_steps=FLAGS.train_steps,
          hooks=hooks)

    else:
      assert FLAGS.mode == 'train_and_eval'
      train_spec = tf.estimator.TrainSpec(input_fn=imagenet_train.input_fn, max_steps=FLAGS.train_steps)
      eval_spec = tf.estimator.EvalSpec(input_fn=imagenet_eval.input_fn, steps=FLAGS.num_eval_images // FLAGS.eval_batch_size, throttle_secs=600)
      tf.estimator.train_and_evaluate(mnasnet_est, train_spec, eval_spec)
      
      elapsed_time = int(time.time() - start_timestamp)
      tf.logging.info('Finished training up to step %d. Elapsed seconds %d.',
                      FLAGS.train_steps, elapsed_time)
      if FLAGS.export_dir:
        export(mnasnet_est, FLAGS.export_dir, FLAGS.post_quantize)
Ejemplo n.º 12
0
def main(unused_argv):
    params = params_dict.ParamsDict(mnasnet_config.MNASNET_CFG,
                                    mnasnet_config.MNASNET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    additional_params = {
        'steps_per_epoch': params.num_train_images / params.train_batch_size,
        'quantized_training': FLAGS.quantized_training,
    }

    params = params_dict.override_params_dict(params,
                                              additional_params,
                                              is_strict=False)

    params.validate()
    params.lock()

    if FLAGS.tpu or params.use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    else:
        tpu_cluster_resolver = None

    if params.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, params.iterations_per_loop)
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=params.iterations_per_loop,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    # Validates Flags.
    if params.precision == 'bfloat16' and params.use_keras:
        raise ValueError(
            'Keras layers do not have full support to bfloat16 activation training.'
            ' You have set precision as %s and use_keras as %s' %
            (params.precision, params.use_keras))

    # Initializes model parameters.
    mnasnet_est = tf.contrib.tpu.TPUEstimator(
        use_tpu=params.use_tpu,
        model_fn=build_model_fn,
        config=config,
        train_batch_size=params.train_batch_size,
        eval_batch_size=params.eval_batch_size,
        export_to_tpu=FLAGS.export_to_tpu,
        params=params.as_dict())

    if FLAGS.mode == 'export_only':
        export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
        return

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=False,
                transpose_input=params.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=params.transpose_input,
                cache=params.use_cache and is_training,
                image_size=params.input_image_size,
                num_parallel_calls=params.num_parallel_calls,
                use_bfloat16=(params.precision == 'bfloat16'))
            for is_training in [True, False]
        ]

    if FLAGS.mode == 'eval':
        eval_steps = params.num_eval_images // params.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = mnasnet_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)
                mnas_utils.archive_ckpt(eval_results,
                                        eval_results['top_1_accuracy'], ckpt)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= params.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if FLAGS.export_dir:
            export(mnasnet_est, FLAGS.export_dir, params, FLAGS.post_quantize)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(  # pylint: disable=protected-access
            FLAGS.model_dir)

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', params.train_steps,
            params.train_steps / params.steps_per_epoch, current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if params.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, params.iterations_per_loop)))
            mnasnet_est.train(input_fn=imagenet_train.input_fn,
                              max_steps=params.train_steps,
                              hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < params.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      params.train_steps)
                mnasnet_est.train(input_fn=imagenet_train.input_fn,
                                  max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = mnasnet_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=params.num_eval_images // params.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)
                ckpt = tf.train.latest_checkpoint(FLAGS.model_dir)
                mnas_utils.archive_ckpt(eval_results,
                                        eval_results['top_1_accuracy'], ckpt)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                params.train_steps, elapsed_time)
            if FLAGS.export_dir:
                export(mnasnet_est, FLAGS.export_dir, params,
                       FLAGS.post_quantize)
Ejemplo n.º 13
0
def main(__):

    print(f"Tensorflow Version is {tf.__version__}")

    # mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    if FLAGS.job_name is None or FLAGS.job_name == '':
        raise ValueError('Must specify an explicit job_name !')
    else:
        print('job_name : %s' % FLAGS.job_name)
    if FLAGS.task_index is None or FLAGS.task_index == '':
        raise ValueError('Must specify an explicit task_index!')
    else:
        print('task_index : %d' % FLAGS.task_index)

    ps_spec = FLAGS.ps_hosts.split(',')
    worker_spec = FLAGS.worker_hosts.split(',')

    # 创建集群
    num_worker = len(worker_spec)
    print("Cluster num is {}".format(num_worker))
    cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)
    if FLAGS.job_name == 'ps':
        server.join()

    is_chief = (FLAGS.task_index == 0)
    worker_device = '/job:worker/task:%d/cpu:0' % FLAGS.task_index
    with tf.device(
            tf.train.replica_device_setter(cluster=cluster,
                                           worker_device=worker_device)):
        # with tf.device("/cpu:0"):
        # --------[PART 01] build model --------------
        # ----[0] init  (1 V 2 placehold)
        # 0.1 Variable
        # 0.2 placeholder
        global_step = tf.Variable(0, name='global_step',
                                  trainable=False)  # 创建纪录全局训练步数变量
        hid_w = tf.Variable(tf.truncated_normal(
            [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
            stddev=1.0 / IMAGE_PIXELS),
                            name='hid_w')
        hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name='hid_b')

        sm_w = tf.Variable(tf.truncated_normal([FLAGS.hidden_units, 10],
                                               stddev=1.0 /
                                               math.sqrt(FLAGS.hidden_units)),
                           name='sm_w')
        sm_b = tf.Variable(tf.zeros([10]), name='sm_b')

        x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS])
        y_ = tf.placeholder(tf.float32, [None, 10])  # real_y

        # ----[1]  Forward

        hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b)
        hid = tf.nn.relu(hid_lin)
        y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))

        #----[2].BackProp (loss ,opt )

        cross_entropy = - \
            tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
        opt = tf.train.AdamOptimizer(FLAGS.learning_rate)
        train_step = opt.minimize(cross_entropy, global_step=global_step)

        # 生成本地的参数初始化操作init_op
        init_op = tf.global_variables_initializer()
        #train_dir = tempfile.mkdtemp()
        sv = tf.train.Supervisor(is_chief=is_chief,
                                 logdir=FLAGS.log_file,
                                 init_op=init_op,
                                 recovery_wait_secs=1,
                                 global_step=global_step)

        if is_chief:
            print('Worker %d: Initailizing session...' % FLAGS.task_index)
        else:
            print('Worker %d: Waiting for session to be initaialized...' %
                  FLAGS.task_index)
        # --------[PART 02 ] Train  model --------------
        run_metadata = tf.RunMetadata()
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                opt_level=tf.OptimizerOptions.L0)))

        sess = sv.prepare_or_wait_for_session(server.target)

        print('Worker %d: Session initialization  complete.' %
              FLAGS.task_index)
        time_begin = time.time()
        print('Traing begins @ %f' % time_begin)
        local_step = 0

        while True:
            batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size)
            train_feed = {x: batch_xs, y_: batch_ys}
            """
            feed_dict 输入数据是最慢的
            _, step = sess.run([train_step, global_step], feed_dict=train_feed)
            """

            local_step += 1

            now = time.time()
            print('%f: Worker %d: traing step %d dome (global step:%d)' %
                  (now, FLAGS.task_index, local_step, step))

            if step >= FLAGS.train_steps:
                break

        time_end = time.time()
        print('Training ends @ %f' % time_end)
        train_time = time_end - time_begin
        print('Training elapsed time:%f s' % train_time)

        val_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        val_xent = sess.run(cross_entropy, feed_dict=val_feed)
        print('After %d training step(s), validation cross entropy = %g' %
              (FLAGS.train_steps, val_xent))
        sess.close()

    # ------------[Part 4] Output   ----------------
    if FLAGS.record_type == "record_parameters":
        par_str = "lr_%g_b1_%g_b2_%g_bsize_%g" % (
            FLAGS.learning_rate, FLAGS.beta_1, FLAGS.beta_2, FLAGS.batch_size)
        FLAGS.model_save_path = os.path.join(FLAGS.current_path, "model",
                                             par_str + ".h5")
        FLAGS.history_output_path = os.path.join(FLAGS.current_path,
                                                 "train_process",
                                                 par_str + ".csv")
        FLAGS.predict_label_output_path = os.path.join(FLAGS.current_path,
                                                       "submit",
                                                       par_str + ".csv")

    # 1 . model save
    model.save(FLAGS.model_save_path)
    history_df = pd.DataFrame(history.history)
    history_df.to_csv(FLAGS.history_output_path)

    # 2. predict
    predict_data = pd.read_csv(FLAGS.predict_x_path)

    predict_x = predict_data.values.reshape([len(predict_data)] + [28, 28, 1])
    predict_y = model.predict(predict_x)
    # label
    predict_label = np.argmax(predict_y, axis=1)
    df_predict_label = pd.DataFrame({
        "ImageId": range(1,
                         len(predict_label) + 1),
        "Label": predict_label
    })
    # export
    df_predict_label.to_csv(FLAGS.predict_label_output_path, index=False)
Ejemplo n.º 14
0
def export(model_params, checkpoint_file, config=None):
    # Input data
    batch_size = 1
    im_size = model_params.im_size
    guide_image = tf.placeholder(tf.float32, [batch_size, 224, 224, 3])
    gb_image = tf.placeholder(tf.float32,
                              [batch_size, im_size[1], im_size[0], 1])
    input_image = tf.placeholder(tf.float32,
                                 [batch_size, im_size[1], im_size[0], 3])

    # Create model

    model_func = get_model_func(model_params.base_model)
    # split the model into visual modulator and other parts, visual modulator only need to run once
    if model_params.use_visual_modulator:
        if model_params.base_model == 'lite':
            v_m_params = visual_modulator_lite(guide_image,
                                               model_params,
                                               is_training=False)
        else:
            v_m_params = visual_modulator(guide_image,
                                          model_params,
                                          is_training=False)
    else:
        v_m_params = None
    net, end_points = model_func([guide_image, gb_image, input_image],
                                 model_params,
                                 visual_modulator_params=v_m_params,
                                 is_training=False)
    probabilities = tf.nn.sigmoid(net, name='prob')
    global_step = tf.Variable(0, name='global_step', trainable=False)
    rewrite_options = rewriter_config_pb2.RewriterConfig()
    rewrite_options.optimizers.append('pruning')
    rewrite_options.optimizers.append('constfold')
    rewrite_options.optimizers.append('layout')
    graph_options = tf.GraphOptions(rewrite_options=rewrite_options,
                                    infer_shapes=True)
    config = tf.ConfigProto(
        graph_options=graph_options,
        allow_soft_placement=True,
    )
    output_names = ['prob']
    for i, v_m_param in enumerate(v_m_params):
        visual_mod_name = 'visual_mod_params_%d' % (i + 1)
        tf.identity(v_m_param, name=visual_mod_name)
        output_names.append(visual_mod_name)
    # Create a saver to load the network
    saver = tf.train.Saver([
        v for v in tf.global_variables()
    ])  #if '-up' not in v.name and '-cr' not in v.name])
    save_name = checkpoint_file + '.graph.pb'
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, checkpoint_file)
        if not model_params.base_model == 'lite':
            sess.run(interp_surgery(tf.global_variables()))
        output_graph_def = graph_util.convert_variables_to_constants(
            sess, sess.graph_def, output_names)
        with open(save_name, 'wb') as writer:
            writer.write(output_graph_def.SerializeToString())
        model_params.output_names = output_names
        with open(save_name + '.json', 'w') as writer:
            json.dump(vars(model_params), writer)
        print 'Model saved in', save_name
Ejemplo n.º 15
0
# from https://github.com/tensorflow/tensorflow/issues/7251
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import tensorflow as tf
from tensorflow.python.client.timeline import Timeline

with tf.device("/gpu:0"):
    x = tf.ones(100, name="x")
    idxs = tf.range(100)

    for i in range(10):
        y = tf.identity(x, name="identity-" + str(i))
        x = tf.dynamic_stitch([idxs, idxs], [x, y], name="stitch-" + str(i))

config = tf.ConfigProto(graph_options=tf.GraphOptions(
    optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)))
sess = tf.InteractiveSession(config=config)
metadata = tf.RunMetadata()
sess.run(x,
         options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE,
                               output_partition_graphs=True),
         run_metadata=metadata)

timeline = Timeline(metadata.step_stats)
with open("dynamic_stitch_gpu_profile.json", "w") as f:
    f.write(timeline.generate_chrome_trace_format())
with open("dynamic_stitch_gpu_profile.pbtxt", "w") as f:
    f.write(str(metadata))
Ejemplo n.º 16
0
def main():
    args = parse_args()
    model_dir = args.model[0] if args.model else None
    if not model_dir:
        if args.start:
            model_dir = 'models/' + datetime.now().strftime('%Y%m%d.%H%M')

            if args.name:
                model_dir += '-' + args.name[0] + '/'
            else:
                model_dir += '/'
        else:
            model_dir = most_recent_model()

    params = {
        'steps': args.steps[0] if args.steps else MAX_STEPS,
        'batch_size': args.batch_size[0] if args.batch_size else BATCH_SIZE,
        'learning_rate': 1e-4 if args.warm_start else 3e-4,
        'num_channels': get_num_channels(args, model_dir) or 128,
        'num_blocks': get_num_blocks(args, model_dir) or 9
    }

    config = tf.estimator.RunConfig(
        tf_random_seed=0xfde6885f if args.deterministic else None,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                optimizer_options=tf.OptimizerOptions(
                    do_common_subexpression_elimination=not args.debug,
                    do_constant_folding=not args.debug,
                    do_function_inlining=not args.debug)),
            gpu_options=tf.GPUOptions(allow_growth=True)))

    if args.warm_start:
        steps_to_skip = 10000
        warm_start_from = tf.estimator.WarmStartSettings(
            ckpt_to_initialize_from=args.warm_start[0],
            vars_to_warm_start='[0-9x].*'  # only layers
        )
    else:
        steps_to_skip = 0
        warm_start_from = None

    if args.deterministic:
        # since 16-bit floating point is not accurate enough for deterministic output, fix
        # it to `f32` instead.
        set_compute_type(tf.float32)

    if args.mask:
        features_mask = list(map(lambda x: float(x), args.mask[0].split(';')))
    else:
        features_mask = None

    hooks = [tf_debug.LocalCLIDebugHook()] if args.debug else []
    nn = tf.estimator.Estimator(config=config,
                                model_fn=model_fn,
                                model_dir=model_dir,
                                params=params,
                                warm_start_from=warm_start_from)

    if args.start or args.resume:
        nn.train(hooks=hooks + [LearningRateScheduler(steps_to_skip)],
                 input_fn=lambda: input_fn(args.files, params[
                     'batch_size'], features_mask, True, args.deterministic),
                 steps=params['steps'] // params['batch_size'])
    elif args.verify:
        # iterate over the entire dataset and collect the metric, which we will
        # then pretty-print as a JSON object to standard output
        results = nn.evaluate(
            hooks=hooks,
            input_fn=lambda: input_fn(args.files, params[
                'batch_size'], features_mask, False, args.deterministic),
            steps=params['steps'] // params['batch_size'])

        print(
            json.dumps(
                results,
                default=lambda x: float(x)
                if x != int(x) else int(x),  # handle `Decimal` types
                sort_keys=True,
                separators=(',', ': '),
                indent=4))
    elif args.dump:
        predictor = nn.predict(input_fn=lambda: input_fn([], params[
            'batch_size'], None, False, False),
                               hooks=[DumpHook()])

        for _ in predictor:
            pass
    elif args.features_map > 0:
        predictor = nn.predict(input_fn=lambda: input_fn(
            args.files, 1, features_mask, False, args.deterministic))
        count = 0

        print('(;GM[1]FF[4]SZ[19]')
        for results in predictor:
            board_state = to_sgf_heat_map(results['features'],
                                          results['tower'])

            print('(;{})'.format(board_state))

            count += 1
            if count > 100:
                break
        print(')')
    elif args.print:
        # tensors are given then print all available tensors with some statistics.
        if not args.files:
            out = {}

            for var in nn.get_variable_names():
                var_value = np.asarray(nn.get_variable_value(var))

                out[var] = {
                    'mean': float(np.average(var_value)),
                    'std': float(np.std(var_value))
                }

            print(
                json.dumps(
                    out,
                    default=lambda x: float(x)
                    if x != int(x) else int(x),  # handle `Decimal` types
                    sort_keys=True,
                    separators=(',', ': '),
                    indent=4))
        else:
            for var in args.files:
                print(var, nn.get_variable_value(var).tolist())
Ejemplo n.º 17
0
def main():
    np.random.seed(0)
    tf.set_random_seed(0)

    dtype = np.float32

    train_images = u.get_mnist_images()

    dsize = 10000
    patches = train_images[:, :dsize].astype(dtype)
    fs = [dsize, 28 * 28, 196, 28 * 28]

    # values from deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial
    X0 = patches
    lambda_ = 3e-3
    rho = tf.constant(0.1, dtype=dtype)
    beta = 3
    W0_0 = u.ng_init(fs[2], fs[3])
    W1_0 = u.ng_init(fs[3], fs[2])
    W0f = u.flatten([W0_0.flatten(), W1_0.flatten()])

    def f(i):
        return fs[i + 1]  # W[i] has shape f[i] x f[i-1]

    dsize = f(-1)
    n = len(fs) - 2

    # helper to create variables with numpy or TF initial value
    init_dict = {}  # {var_placeholder: init_value}
    vard = {}  # {var: u.VarInfo}

    def init_var(val, name, trainable=False, noinit=False):
        if isinstance(val, tf.Tensor):
            collections = [] if noinit else None
            var = tf.Variable(val, name=name, collections=collections)
        else:
            val = np.array(val)
            assert u.is_numeric, "Unknown type"
            holder = tf.placeholder(dtype,
                                    shape=val.shape,
                                    name=name + "_holder")
            var = tf.Variable(holder, name=name, trainable=trainable)
            init_dict[holder] = val
        var_p = tf.placeholder(var.dtype, var.shape)
        var_setter = var.assign(var_p)
        vard[var] = u.VarInfo(var_setter, var_p)
        return var

    lr = init_var(0.2, "lr")

    Wf = init_var(W0f, "Wf", True)
    Wf_copy = init_var(W0f, "Wf_copy", True)
    W = u.unflatten(Wf, fs[1:])  # perftodo: this creates transposes
    X = init_var(X0, "X")
    W.insert(0, X)

    def sigmoid(x):
        return tf.sigmoid(x)

    def d_sigmoid(y):
        return y * (1 - y)

    def kl(x, y):
        return x * tf.log(x / y) + (1 - x) * tf.log((1 - x) / (1 - y))

    def d_kl(x, y):
        return (1 - x) / (1 - y) - x / y

    # A[i] = activations needed to compute gradient of W[i]
    # A[n+1] = network output
    A = [None] * (n + 2)

    fail_node = tf.Print(0, [0], "fail, this must never run")
    with tf.control_dependencies([fail_node]):
        A[0] = u.Identity(dsize, dtype=dtype)
    A[1] = W[0]
    for i in range(1, n + 1):
        A[i + 1] = sigmoid(W[i] @ A[i])

    # reconstruction error and sparsity error
    err = (A[3] - A[1])
    rho_hat = tf.reduce_sum(A[2], axis=1, keep_dims=True) / dsize

    # B[i] = backprops needed to compute gradient of W[i]
    # B2[i] = backprops from sampled labels needed for natural gradient
    B = [None] * (n + 1)
    B2 = [None] * (n + 1)
    B[n] = err * d_sigmoid(A[n + 1])
    sampled_labels_live = tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0)
    sampled_labels = init_var(sampled_labels_live,
                              "sampled_labels",
                              noinit=True)
    B2[n] = sampled_labels * d_sigmoid(A[n + 1])
    for i in range(n - 1, -1, -1):
        backprop = t(W[i + 1]) @ B[i + 1]
        backprop2 = t(W[i + 1]) @ B2[i + 1]
        B[i] = backprop * d_sigmoid(A[i + 1])
        B2[i] = backprop2 * d_sigmoid(A[i + 1])

    # dW[i] = gradient of W[i]
    dW = [None] * (n + 1)
    pre_dW = [None] * (n + 1)  # preconditioned dW
    pre_dW_stable = [None] * (n + 1)  # preconditioned stable dW

    cov_A = [None] * (n + 1)  # covariance of activations[i]
    cov_B2 = [None] * (n + 1)  # covariance of synthetic backprops[i]
    vars_svd_A = [None] * (n + 1)
    vars_svd_B2 = [None] * (n + 1)
    for i in range(1, n + 1):
        cov_op = A[i] @ t(A[i]) / dsize + lambda_ * u.Identity(A[i].shape[0])
        cov_A[i] = init_var(cov_op, "cov_A%d" % (i, ))
        cov_op = B2[i] @ t(B2[i]) / dsize + lambda_ * u.Identity(
            B2[i].shape[0])
        cov_B2[i] = init_var(cov_op, "cov_B2%d" % (i, ))
        vars_svd_A[i] = u.SvdWrapper(cov_A[i],
                                     "svd_A_%d" % (i, ),
                                     do_inverses=True)
        vars_svd_B2[i] = u.SvdWrapper(cov_B2[i],
                                      "svd_B2_%d" % (i, ),
                                      do_inverses=True)
        whitened_A = vars_svd_A[i].inv @ A[i]
        whitened_B = vars_svd_B2[i].inv @ B[i]
        pre_dW[i] = (whitened_B @ t(whitened_A)) / dsize
        dW[i] = (B[i] @ t(A[i])) / dsize

    # Loss function
    reconstruction = u.L2(err) / (2 * dsize)

    loss = reconstruction

    grad_live = u.flatten(dW[1:])
    pre_grad_live = u.flatten(pre_dW[1:])  # fisher preconditioned gradient
    grad = init_var(grad_live, "grad")
    pre_grad = init_var(pre_grad_live, "pre_grad")

    update_params_op = Wf.assign(Wf - lr * pre_grad).op
    save_params_op = Wf_copy.assign(Wf).op
    pre_grad_dot_grad = tf.reduce_sum(pre_grad * grad)
    grad_norm = tf.reduce_sum(grad * grad)
    pre_grad_norm = u.L2(pre_grad)

    def dump_svd_info(step):
        """Dump singular values and gradient values in those coordinates."""
        for i in range(1, n + 1):
            svd = vars_svd_A[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            u.dump(s0, "A_%d_%d" % (i, step))
            A0 = A[i].eval()
            At0 = v0.T @ A0
            u.dump(A0 @ A0.T, "Acov_%d_%d" % (i, step))
            u.dump(At0 @ At0.T, "Atcov_%d_%d" % (i, step))
            u.dump(s0, "As_%d_%d" % (i, step))

        for i in range(1, n + 1):
            svd = vars_svd_B2[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            u.dump(s0, "B2_%d_%d" % (i, step))
            B0 = B[i].eval()
            Bt0 = v0.T @ B0
            u.dump(B0 @ B0.T, "Bcov_%d_%d" % (i, step))
            u.dump(Bt0 @ Bt0.T, "Btcov_%d_%d" % (i, step))
            u.dump(s0, "Bs_%d_%d" % (i, step))

    def advance_batch():
        sess.run(sampled_labels.initializer)  # new labels for next call

    def update_covariances():
        ops_A = [cov_A[i].initializer for i in range(1, n + 1)]
        ops_B2 = [cov_B2[i].initializer for i in range(1, n + 1)]
        sess.run(ops_A + ops_B2)

    def update_svds():
        vars_svd_A[2].update()
        vars_svd_B2[2].update()
        vars_svd_B2[1].update()

    def init_svds():
        """Initialize our SVD to identity matrices."""
        ops = []
        for i in range(1, n + 1):
            ops.extend(vars_svd_A[i].init_ops)
            ops.extend(vars_svd_B2[i].init_ops)
        sess = tf.get_default_session()
        sess.run(ops)

    init_op = tf.global_variables_initializer()

    from tensorflow.core.protobuf import rewriter_config_pb2

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
        memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
    graph_options = tf.GraphOptions(optimizer_options=optimizer_options,
                                    rewrite_options=rewrite_options)
    config = tf.ConfigProto(graph_options=graph_options)

    sess = tf.InteractiveSession(config=config)
    sess.run(Wf.initializer, feed_dict=init_dict)
    sess.run(X.initializer, feed_dict=init_dict)
    advance_batch()
    update_covariances()
    init_svds()
    sess.run(init_op, feed_dict=init_dict)  # initialize everything else

    print("Running training.")
    u.reset_time()

    step_lengths = []  # keep track of learning rates
    losses = []

    # adaptive line search parameters
    alpha = 0.3  # acceptable fraction of predicted decrease
    beta = 0.8  # how much to shrink when violation
    growth_rate = 1.05  # how much to grow when too conservative

    def update_cov_A(i):
        sess.run(cov_A[i].initializer)

    def update_cov_B2(i):
        sess.run(cov_B2[i].initializer)

    # only update whitening matrix of input activations in the beginning
    vars_svd_A[1].update()

    for step in range(40):
        update_covariances()
        update_svds()

        sess.run(grad.initializer)
        sess.run(pre_grad.initializer)

        lr0, loss0 = sess.run([lr, loss])
        update_params_op.run()
        advance_batch()

        losses.append(loss0)
        step_lengths.append(lr0)

        print("Step %d loss %.2f" % (step, loss0))
        u.record_time()

    assert losses[-1] < 0.59
    assert losses[-1] > 0.57
    assert 20e-3 < min(
        u.global_time_list) < 50e-3, "Time should be 40ms on 1080"
    u.summarize_time()
    print("Test passed")
Ejemplo n.º 18
0
def main(_):
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    if FLAGS.job_name == "ps":
        ps_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=0.00001))

        # Create and start a server for the local task.
        server = tf.train.Server(
            cluster,
            #                                protocol = "grpc_rdma",
            job_name=FLAGS.job_name,
            task_index=FLAGS.task_index,
            config=ps_config)
        server.join()
    elif FLAGS.job_name == "worker":

        # Create and start a server for the local task.
        server = tf.train.Server(
            cluster,
            #                                 protocol = "grpc_rdma",
            job_name=FLAGS.job_name,
            task_index=FLAGS.task_index)

        local_worker_device = "/job:worker/task:%d" % FLAGS.task_index
        with tf.device(
                tf.train.replica_device_setter(
                    ps_device='/job:ps/cpu:0',
                    worker_device=local_worker_device,
                    cluster=cluster)):

            if FLAGS.network == 'lstm':
                from models.lstm import KitModel
            elif FLAGS.network == 'gru':
                from models.gru import KitModel
            elif FLAGS.network == 'fc':
                from models.fullyconnect import KitModel
            elif FLAGS.network == 'alexnet':
                from models.alexnet import KitModel
            elif FLAGS.network == 'vgg16':
                from models.vgg16 import KitModel
            elif FLAGS.network == 'vgg19' or FLAGS.network == 'vgg_e':
                from models.vgg19 import KitModel
            elif FLAGS.network == 'inception_v3':
                from models.inception_v3 import KitModel
            elif FLAGS.network == 'resnet':
                from models.resnet import KitModel
            elif FLAGS.network == 'seq2seq':
                import models.translate.translate
                from models.translate.translate import dist_train
                dist_train(FLAGS, server, cluster)
                sys.exit()
            else:
                sys.exit("Invalid network [%s]" % args.network)

            this_model = KitModel(FLAGS)
            this_model.build_model()

        train_dir = tempfile.mkdtemp()

        sess_config = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            device_filters=[
                "/job:ps", "/job:worker/task:%d" % FLAGS.task_index
            ],
            graph_options=tf.GraphOptions(
                optimizer_options=tf.OptimizerOptions(
                    opt_level=tf.OptimizerOptions.L1)),
            gpu_options=tf.GPUOptions(visible_device_list=""))

        if FLAGS.infer_shapes == True:
            sess_config.graph_options.infer_shapes = FLAGS.infer_shapes

        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                 logdir=train_dir,
                                 init_op=tf.global_variables_initializer(),
                                 global_step=this_model.global_step,
                                 summary_writer=None,
                                 saver=None)

        if FLAGS.task_index == 0:
            print("Worker %d: Initializing session..." % FLAGS.task_index)
        else:
            print("Worker %d: Waiting for session to be initialized..." %
                  FLAGS.task_index)

        sess = sv.prepare_or_wait_for_session(server.target,
                                              config=sess_config,
                                              start_standard_services=True)

        print_model()

        print("Start warmup %d epoch." % FLAGS.warmup)
        for _ in range(FLAGS.warmup):
            this_model.get_data()
            sess.run(this_model.train_op, feed_dict=this_model.get_feed_dict())

        current_step = 0
        duration = 0
        while current_step < FLAGS.epoch:
            current_step += 1
            this_model.get_data()
            print("Start step %d" % current_step)
            start_time = time.time()
            _, step_loss = sess.run([this_model.train_op, this_model.cost],
                                    feed_dict=this_model.get_feed_dict())
            end_time = time.time()
            print(
                "Finish step %d, loss = %f, speed = %f sampes/s, duration = %f seconds"
                % (current_step, step_loss, FLAGS.batch_size /
                   (end_time - start_time), end_time - start_time))
            duration += end_time - start_time

        print("Total Time = %f s." % duration)
        #writer.close()

    else:
        sys.exit("Invalid job role name [%s]!" % args.job_name)
Ejemplo n.º 19
0
def main(_):
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    if FLAGS.job_name == "ps":
        ps_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=0.01))

        # Create and start a server for the local task.
        server = tf.train.Server(
            cluster,
            #                                protocol = "grpc_rdma",
            job_name=FLAGS.job_name,
            task_index=FLAGS.task_index,
            config=ps_config)
        server.join()
    elif FLAGS.job_name == "worker":

        # Create and start a server for the local task.
        server = tf.train.Server(
            cluster,
            #                                 protocol = "grpc+verbs",
            job_name=FLAGS.job_name,
            task_index=FLAGS.task_index)

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.data_dir)

        #####################################
        # Select the preprocessing function #
        #####################################
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            FLAGS.network, is_training=True)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(FLAGS.network,
                                                 dataset.num_classes,
                                                 is_training=True)

        if FLAGS.dataset_name != "synthetic":
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)

            [image, label] = provider.get(['image', 'label'])

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch([image, label],
                                            batch_size=FLAGS.batch_size,
                                            num_threads=4,
                                            capacity=5 * FLAGS.batch_size)
        else:
            images = random_ops.random_uniform(
                (FLAGS.batch_size, network_fn.default_image_size,
                 network_fn.default_image_size, 3),
                maxval=1)
            labels = random_ops.random_uniform((FLAGS.batch_size, ),
                                               maxval=FLAGS.num_classes - 1,
                                               dtype=tf.int32)

        with tf.device(
                tf.train.replica_device_setter(
                    ps_device='/job:ps/cpu:0',
                    worker_device=("/job:worker/task:%d" % FLAGS.task_index),
                    cluster=cluster)):

            global_step = tf.contrib.framework.get_or_create_global_step()

            #images, labels = cifar.distorted_inputs(FLAGS)
            logits, end_points = network_fn(images)
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=labels)
            cost = tf.reduce_mean(loss)
            train_op = tf.train.AdagradOptimizer(0.01).minimize(
                cost, global_step=global_step)

        saver = tf.train.Saver()
        print_model()

        train_dir = tempfile.mkdtemp()

        sess_config = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            device_filters=[
                "/job:ps", "/job:worker/task:%d" % FLAGS.task_index
            ],
            graph_options=tf.GraphOptions(
                optimizer_options=tf.OptimizerOptions(
                    opt_level=tf.OptimizerOptions.L1)),
            gpu_options=tf.GPUOptions(visible_device_list=""))

        if FLAGS.infer_shapes == True:
            sess_config.graph_options.infer_shapes = FLAGS.infer_shapes

        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                 logdir=train_dir,
                                 init_op=tf.global_variables_initializer(),
                                 global_step=global_step,
                                 summary_writer=None,
                                 saver=saver)

        if FLAGS.task_index == 0:
            print("Worker %d: Initializing session..." % FLAGS.task_index)
        else:
            print("Worker %d: Waiting for session to be initialized..." %
                  FLAGS.task_index)

        sess = sv.prepare_or_wait_for_session(server.target,
                                              config=sess_config,
                                              start_standard_services=True)
        writer = tf.summary.FileWriter('./graphs', sess.graph)
        writer.close()
        tf.train.export_meta_graph(filename='kit_meta_graph.txt',
                                   graph=sess.graph,
                                   as_text=True)
        print("Graph Saved.")

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        print("Start warmup %d epoch." % FLAGS.warmup)
        for _ in range(FLAGS.warmup):
            sess.run(train_op)

        options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        current_step = 0
        duration = 0
        while current_step < FLAGS.epoch:
            current_step += 1
            start_time = time.time()
            _, step_loss = sess.run([train_op, cost],
                                    options=options,
                                    run_metadata=run_metadata)
            end_time = time.time()
            print(
                "Finish step %d, loss = %f, speed = %f sampes/s, duration = %f seconds"
                % (current_step, step_loss, FLAGS.batch_size /
                   (end_time - start_time), end_time - start_time))
            duration += end_time - start_time

            if current_step == 3:
                fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                chrome_trace = fetched_timeline.generate_chrome_trace_format()
                with open('timeline.json', 'w') as f:
                    f.write(chrome_trace)

        print("Total Time = %f s." % duration)
        saver.save(sess, "kit_alexnet")

    else:
        sys.exit("Invalid job role name [%s]!" % args.job_name)
Ejemplo n.º 20
0
    def tf_run_frozen_graph(self, file, xla, parallel, warmup, num_iter):
        print("run frozen graph----------------------------")
        graph_def, graph = self.import_graph(file)
        if (self.debug):
            print()
            print('Operations:')
        assert graph is not None
        ops = graph.get_operations()  # type: Iterable[tf.Operation]
        input_nodes = []
        variables_nodes = []
        last_nodes = []
        for op in ops:
            if (self.debug):
                print('- {0:20s} "{1}" ({2} outputs)'.format(
                    op.type, op.name, len(op.outputs)))
            last_nodes = op.outputs
            if op.type == 'Placeholder':
                for node in op.outputs:
                    input_nodes.append(node)
            if "Variable" in op.type:
                variables_nodes.append(op)

        if (self.debug):
            print()
            print('Sources (operations without inputs):')
            for op in ops:
                if len(op.inputs) > 0:
                    continue
                print('- {0}'.format(op.name))

            print()
            print('Operation inputs:')
            for op in ops:
                if len(op.inputs) == 0:
                    continue
                print('- {0:20}'.format(op.name))
                print('  {0}'.format(', '.join(i.name for i in op.inputs)))

            print()
            print('Tensors:')
            for op in ops:
                for out in op.outputs:
                    print('- {0:20} {1:10} "{2}"'.format(
                        str(out.shape), out.dtype.name, out.name))
        with tf.Session(graph=graph) as sess:
            var_inits = []
            g_def = graph.as_graph_def()
            for var in variables_nodes:
                vt = graph.get_tensor_by_name(var.outputs[0].name)
                # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer)
                # v = tf.get_variable(name = var.name, shape = vt.shape, initializer = tf.ones_initializer)
                # Ones initializer
                dt = tf.as_dtype(vt.dtype.base_dtype).as_datatype_enum
                dt_int32 = tf.as_dtype(tf.int32).as_datatype_enum

                init = tf.NodeDef(
                    name=var.name + "/ones",
                    op="Fill",
                    input=[var.name + "/ones/shape", var.name + "/ones/const"],
                    attr={
                        'T': tf.AttrValue(type=dt),
                        'index_type': tf.AttrValue(type=dt_int32)
                    })

                shape = tf.NodeDef(
                    name=var.name + "/ones/shape",
                    op="Const",
                    attr={
                        "dtype":
                        tf.AttrValue(type=dt_int32),
                        "value":
                        tf.AttrValue(tensor=tf.make_tensor_proto(
                            vt.get_shape().as_list()))
                    })

                const = tf.NodeDef(
                    name=var.name + "/ones/const",
                    op="Const",
                    #dtype =tf.AttrValue(type=dt),
                    attr={
                        "dtype": tf.AttrValue(type=dt),
                        "value":
                        tf.AttrValue(tensor=tf.make_tensor_proto(1.0, dt))
                    })

                node = tf.NodeDef(name=var.name + "/assign",
                                  op='Assign',
                                  input=[var.name, var.name + "/ones"],
                                  attr={
                                      'use_locking': tf.AttrValue(b=False),
                                      'validate_shape': tf.AttrValue(b=True),
                                      'T': tf.AttrValue(type=dt)
                                  })
                g_def.node.extend([shape, const, init, node])
                var_inits.append("^" + var.name + "/assign")

            noop_assign = tf.NodeDef(name="init_all_var",
                                     op="NoOp",
                                     input=var_inits)
            g_def.node.extend([noop_assign])

        tf.reset_default_graph()
        tf.import_graph_def(g_def)

        session_conf = tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False,
            graph_options=tf.GraphOptions(infer_shapes=True),
            inter_op_parallelism_threads=parallel)

        if xla:
            session_conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

        with tf.Session(config=session_conf) as sess:
            init = tf.get_default_graph().get_operation_by_name(
                "import/init_all_var")

            input_nodes = []
            varlist = []
            feed_dict = {}
            aps = []

            ops = tf.get_default_graph().get_operations()
            for op in ops:
                if op.type == 'Placeholder':
                    for node in op.outputs:
                        feed_dict[node] = np.ones(
                            node.shape, dtype=node.dtype.as_numpy_dtype())

            # Get result of applygradient
            for op in ops:
                if "ApplyGradient" in str(op.type):
                    aps.append(op)
                    varlist.append(op.inputs[0])

            last_outputs = []
            num_nodes = len(ops)
            name2nodeIdx_map = {}
            for i in range(num_nodes):
                name2nodeIdx_map[ops[i].name] = i
            node_outputs_ = [[] for i in range(num_nodes)]
            for n in range(num_nodes):
                op = ops[n]
                pending_count = len(op.inputs)
                for i in range(pending_count):
                    input_name_id = op.inputs[i].name.split(':')
                    node_outputs_[name2nodeIdx_map[input_name_id[0]]].append(n)
            for n in range(num_nodes):
                if len(node_outputs_[n]) == 0 and ops[n].type != 'NoOp':
                    print('- {0:20s} {1}'.format(ops[n].type, ops[n].name))
                    for m in range(len(ops[n].inputs)):
                        print('<-in-- {0:20s}'.format(ops[n].inputs[m].name))
                        last_outputs.append(ops[n].inputs[m])

            # Init as Ones
            sess.run(init)
            # Get vals before apply_gradients
            for i in range(warmup):
                ret = sess.run(last_outputs + varlist, feed_dict)
                for i in range(0, len(last_outputs)):
                    out_flat = ret[i].flat
                    if (len(out_flat) > 0):
                        max_len = min(10, len(out_flat))
                        print(last_outputs[i].name)
                        print(out_flat[:max_len], "...(size=", len(out_flat),
                              "end with", out_flat[-1], ")")
                # Do the apply_gradient
                sess.run(init)
                ret1 = sess.run(varlist + aps, feed_dict)
                print("Updated:")
                for i in range(0, len(varlist)):
                    print(varlist[i].name, ret1[i])

            iter_times = []
            for i in range(num_iter):
                start_time = time.time()
                ret = sess.run(last_outputs + varlist, feed_dict)
                ret1 = sess.run(varlist + aps, feed_dict)
                iter_time = (time.time() - start_time) * 1000
                iter_times.append(iter_time)
                print("Iteration time %f ms" % (iter_time))

            print("Summary: [min, max, mean] = [%f, %f, %f] ms" %
                  (min(iter_times), max(iter_times),
                   sum(iter_times) / len(iter_times)))
Ejemplo n.º 21
0
def main(unused_argv):
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or FLAGS.use_tpu) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if FLAGS.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_cores,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    resnet_classifier = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn,
        config=config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        export_to_tpu=FLAGS.export_to_tpu)
    assert FLAGS.precision == 'bfloat16' or FLAGS.precision == 'float32', (
        'Invalid value for --precision flag; must be bfloat16 or float32.')
    tf.logging.info('Precision: %s', FLAGS.precision)
    use_bfloat16 = FLAGS.precision == 'bfloat16'

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=use_bfloat16,
                transpose_input=FLAGS.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                num_parallel_calls=FLAGS.num_parallel_calls,
                use_bfloat16=use_bfloat16) for is_training in [True, False]
        ]

    steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size
    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size

    if FLAGS.mode == 'eval':

        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(
            FLAGS.model_dir)  # pylint: disable=protected-access,line-too-long
        steps_per_epoch = FLAGS.num_train_images // FLAGS.train_batch_size

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', FLAGS.train_steps,
            FLAGS.train_steps / steps_per_epoch, current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            if FLAGS.profile_every_n_steps > 0:
                hooks.append(
                    tpu_profiler_hook.TPUProfilerHook(
                        save_steps=FLAGS.profile_every_n_steps,
                        output_dir=FLAGS.model_dir,
                        tpu=FLAGS.tpu))
            resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                    max_steps=FLAGS.train_steps,
                                    hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                resnet_classifier.train(input_fn=imagenet_train.input_fn,
                                        max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d.',
                    next_checkpoint, int(time.time() - start_timestamp))

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                tf.logging.info('Starting to evaluate.')
                eval_results = resnet_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=FLAGS.num_eval_images // FLAGS.eval_batch_size)
                tf.logging.info('Eval results at step %d: %s', next_checkpoint,
                                eval_results)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)

        if FLAGS.export_dir is not None:
            # The guide to serve a exported TensorFlow model is at:
            #    https://www.tensorflow.org/serving/serving_basic
            tf.logging.info('Starting to export model.')
            resnet_classifier.export_saved_model(
                export_dir_base=FLAGS.export_dir,
                serving_input_receiver_fn=imagenet_input.image_serving_input_fn
            )
Ejemplo n.º 22
0
def facs_model(learning_rate, scale_class_weight, use_two_fc, use_three_fc,
               use_four_fc, use_five_fc, use_six_fc, use_seven_fc, hparam):
    config = tf.ConfigProto(graph_options=tf.GraphOptions(
        optimizer_options=tf.OptimizerOptions(
            opt_level=tf.OptimizerOptions.L0)))
    tf.reset_default_graph()
    sess = tf.Session("", config=config)

    # Setup placeholders, and reshape the data
    x = tf.placeholder(tf.float32, [None, n_input], name="x")
    y = tf.placeholder(tf.float32, [None, n_output], name="labels")
    sw = tf.placeholder(tf.float32, [None, n_output], name='intensity_weights')

    # Main function compares the number of FCs for performance.
    if use_two_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc1/relu", relu)
        logits = fc_layer(relu, n_hidden_1, n_output, "fc2")
    elif use_three_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc3/relu", relu)
        fc2 = fc_layer(relu, n_hidden_1, n_hidden_2, "fc2")
        relu_2 = tf.nn.relu(fc2)
        tf.summary.histogram("fc3/relu", relu_2)
        logits = fc_layer(relu_2, n_hidden_2, n_output, "fc3")
    elif use_four_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc4/relu", relu)
        fc2 = fc_layer(relu, n_hidden_1, n_hidden_2, "fc2")
        relu_2 = tf.nn.relu(fc2)
        tf.summary.histogram("fc4/relu", relu_2)
        fc3 = fc_layer(relu_2, n_hidden_2, n_hidden_3, "fc3")
        relu_3 = tf.nn.relu(fc3)
        tf.summary.histogram("fc4/relu", relu_3)
        logits = fc_layer(relu_3, n_hidden_3, n_output, "fc4")
    elif use_five_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc5/relu", relu)
        fc2 = fc_layer(relu, n_hidden_1, n_hidden_2, "fc2")
        relu_2 = tf.nn.relu(fc2)
        tf.summary.histogram("fc5/relu", relu_2)
        fc3 = fc_layer(relu_2, n_hidden_2, n_hidden_3, "fc3")
        relu_3 = tf.nn.relu(fc3)
        tf.summary.histogram("fc5/relu", relu_3)
        fc4 = fc_layer(relu_3, n_hidden_3, n_hidden_4, "fc4")
        relu_4 = tf.nn.relu(fc4)
        tf.summary.histogram("fc5/relu", relu_4)
        logits = fc_layer(relu_4, n_hidden_4, n_output, "fc5")
    elif use_six_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc6/relu", relu)
        fc2 = fc_layer(relu, n_hidden_1, n_hidden_2, "fc2")
        relu_2 = tf.nn.relu(fc2)
        tf.summary.histogram("fc6/relu", relu_2)
        fc3 = fc_layer(relu_2, n_hidden_2, n_hidden_3, "fc3")
        relu_3 = tf.nn.relu(fc3)
        tf.summary.histogram("fc6/relu", relu_3)
        fc4 = fc_layer(relu_3, n_hidden_3, n_hidden_4, "fc4")
        relu_4 = tf.nn.relu(fc4)
        tf.summary.histogram("fc6/relu", relu_4)
        fc5 = fc_layer(relu_4, n_hidden_4, n_hidden_5, "fc5")
        relu_5 = tf.nn.relu(fc5)
        tf.summary.histogram("fc6/relu", relu_5)
        logits = fc_layer(relu_5, n_hidden_5, n_output, "fc6")
    elif use_seven_fc:
        fc1 = fc_layer(x, n_input, n_hidden_1, "fc1")
        relu = tf.nn.relu(fc1)
        tf.summary.histogram("fc7/relu", relu)
        fc2 = fc_layer(relu, n_hidden_1, n_hidden_2, "fc2")
        relu_2 = tf.nn.relu(fc2)
        tf.summary.histogram("fc7/relu", relu_2)
        fc3 = fc_layer(relu_2, n_hidden_2, n_hidden_3, "fc3")
        relu_3 = tf.nn.relu(fc3)
        tf.summary.histogram("fc7/relu", relu_3)
        fc4 = fc_layer(relu_3, n_hidden_3, n_hidden_4, "fc4")
        relu_4 = tf.nn.relu(fc4)
        tf.summary.histogram("fc7/relu", relu_4)
        fc5 = fc_layer(relu_4, n_hidden_4, n_hidden_5, "fc5")
        relu_5 = tf.nn.relu(fc5)
        tf.summary.histogram("fc7/relu", relu_5)
        fc6 = fc_layer(relu_5, n_hidden_5, n_hidden_6, "fc6")
        relu_6 = tf.nn.relu(fc6)
        tf.summary.histogram("fc7/relu", relu_6)
        logits = fc_layer(relu_6, n_hidden_6, n_output, "fc7")

    else:
        logits = fc_layer(x, n_input, n_output, "fc")

    # Loss function
    with tf.name_scope("xent"):
        # The positive and negative samples in the data are unbalanced.
        # To push the algorithm to focus on fitting positives, I weighted the
        # positive values more than the negative.

        maxY = tf.reduce_sum(y, 1) * scale_class_weight
        class_weights = (maxY + 1) / 6

        # Some expressions are more intense than others in the CK+ database and
        # and that is weighted in the loss function by sample weights, sw.
        # However, I got better results with just weighting all AUs
        # with equal intensity.

        #        mult_w = tf.multiply(y, sw)
        #        sum_w = tf.reduce_sum(mult_w,1)
        #
        #        class_weights = ( sum_w + 1) / 6

        print(class_weights.get_shape())
        xent = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                    labels=y,
                                                    name="xent"))
        xent = tf.reduce_mean(xent * class_weights)
        tf.summary.scalar("xent", xent)

    with tf.name_scope("train"):
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

    with tf.name_scope("accuracy"):
        zero = tf.constant(0, dtype=tf.float32)

        onesMat = tf.ones_like(logits)
        zerosMat = tf.zeros_like(logits)
        onesY = tf.ones_like(y, dtype=tf.float32)

        yFloat = tf.cast(y, dtype=tf.float32)
        yFlipped = onesY - yFloat
        # PREDICTION - If logits >= 0, logits = 1, else logits = 0.
        logitsBin = tf.cast(tf.where(logits >= zero, onesMat, zerosMat),
                            dtype=tf.float32,
                            name="op_to_restore")

        tf.add_to_collection("coll", logitsBin)
        tf.add_to_collection("coll", x)

        print('logitsBin', logitsBin.get_shape())
        print('y', y.get_shape())
        print('where_logitsBin', tf.where(logitsBin)[:, 1].get_shape())
        print('where_y', tf.where(y)[:, 1].get_shape())
        time_steps = tf.cast(tf.shape(y)[0], dtype='int32')
        print(time_steps.get_shape())

        nFacs = tf.count_nonzero(y, 1, dtype=tf.float32)
        onesFacs = tf.ones_like(nFacs)
        nFacs_Zeros = onesFacs * numFacs - nFacs

        nFacs = tf.where(tf.equal(nFacs, zero), onesFacs, nFacs)
        nFacs_Zeros = tf.where(tf.equal(nFacs_Zeros, zero), onesFacs,
                               nFacs_Zeros)

        # Find TPR, TNR, FPR, FNR.
        matrix_positive = tf.cast(
            tf.equal(logitsBin, y)
            & tf.equal(yFloat, tf.constant(1, dtype=tf.float32)),
            dtype=tf.float32)
        correct_pos = tf.reduce_sum(matrix_positive) / tf.reduce_sum(yFloat)
        tf.summary.scalar("TruePosRate", correct_pos)

        matrix_negative = tf.cast(tf.equal(logitsBin, y)
                                  & tf.equal(yFloat, zero),
                                  dtype=tf.float32)
        correct_neg = tf.reduce_sum(matrix_negative) / tf.reduce_sum(yFlipped)
        tf.summary.scalar("TrueNegRate", correct_neg)

        matrix_falsePos = tf.cast(tf.not_equal(logitsBin, y)
                                  & tf.equal(y, zero),
                                  dtype=tf.float32)  #or yFlipped = 1
        falsePos = tf.reduce_sum(matrix_falsePos) / tf.reduce_sum(yFlipped)
        tf.summary.scalar("falsePosRate", falsePos)

        matrix_falseNeg = tf.cast(
            tf.not_equal(logitsBin, y)
            & tf.equal(yFloat, tf.constant(1, dtype=tf.float32)),
            dtype=tf.float32)
        falseNeg = tf.reduce_sum(matrix_falseNeg) / tf.reduce_sum(yFloat)
        tf.summary.scalar("falseNegRate", falseNeg)

        tp_sum = tf.reduce_sum(matrix_positive, 0)
        tp_sum_append = tf.concat([tf.constant([0], dtype=tf.float32), tp_sum],
                                  0)
        tf_sum = tf.reduce_sum(matrix_negative, 0)
        fp_sum = tf.reduce_sum(matrix_falsePos, 0)
        fn_sum = tf.reduce_sum(matrix_falseNeg, 0)

        # Get Matrix of Confusion for multiclass binary classifier.
        confusion = tf.Variable(initial_value=tf.zeros(
            [n_output + 1, n_output + 1]),
                                name='confusion')
        confusion1 = tf.Variable(initial_value=tf.cast(tf.diag(
            np.repeat(1, n_output + 1)),
                                                       dtype=tf.float32),
                                 name='confusion1')
        confusion2 = tf.Variable(initial_value=tf.zeros(
            [n_output + 1, n_output + 1]),
                                 name='confusion2')
        confusion3 = tf.Variable(initial_value=tf.zeros(
            [n_output + 1, n_output + 1]),
                                 name='confusion3')
        confusion4 = tf.Variable(initial_value=tf.zeros(
            [n_output + 1, n_output + 1]),
                                 name='confusion4')

        confusion1 = confusion1[0, 0].assign(5)
        confusion1 = confusion1 * tp_sum_append
        confusion2 = confusion2[0, 0].assign(tf.reduce_sum(tf_sum))
        confusion3 = tf.assign(confusion3[0, 1:n_output + 1], fp_sum)
        confusion4 = confusion4[1:n_output + 1, 0].assign(fn_sum)

        confusion = confusion1 + confusion2 + confusion3 + confusion4

        txtConfusion = tf.as_string(confusion,
                                    precision=0,
                                    name='txtConfusion')

        tf.summary.text('txtConfusion', txtConfusion)

        correct_prediction = tf.cast(tf.equal(logitsBin, y),
                                     dtype=tf.float32,
                                     name="correct_prediction")

        accuracy = tf.reduce_mean(correct_prediction, name="accuracy")

        tf.summary.scalar("accuracy", accuracy)


# Summary for tensorboard
    summ = tf.summary.merge_all()

    saver = tf.train.Saver()
    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    sess.run(init)

    writer = tf.summary.FileWriter(LOGDIR + hparam + '/train')
    test_writer = tf.summary.FileWriter(LOGDIR + hparam + '/test')
    writer.add_graph(sess.graph)

    for i in range(3001):
        if i % 5 == 0:
            [train_accuracy, s] = sess.run([accuracy, summ],
                                           feed_dict={
                                               x: train_x,
                                               y: train_y,
                                               sw: sw_train
                                           })
            sess.run([confusion],
                     feed_dict={
                         x: test_x,
                         y: test_y,
                         sw: sw_test
                     })

            writer.add_summary(s, i)

        if i % 50 == 0:
            [acc, s] = sess.run([accuracy, summ],
                                feed_dict={
                                    x: test_x,
                                    y: test_y,
                                    sw: sw_test
                                })
            sess.run([confusion],
                     feed_dict={
                         x: test_x,
                         y: test_y,
                         sw: sw_test
                     })
            test_writer.add_summary(s, i)
            saver.save(sess, os.path.join(savepath, hparam, "model"), i)
        sess.run(train_step, feed_dict={x: train_x, y: train_y, sw: sw_train})
Ejemplo n.º 23
0
        print(f"Rank {hvd.rank()}:{hvd.local_rank()} reporting!")

    else:
        np.random.seed(args.seed)
        tf.random.set_random_seed(args.seed)
        random.seed(args.seed)

    if args.architecture in ('stylegan2'):
        assert args.starting_phase == args.ending_phase

    if 'OMP_NUM_THREADS' not in os.environ:
        print("Warning: OMP_NUM_THREADS not set. Setting it to 1.")
        os.environ['OMP_NUM_THREADS'] = str(1)

    gopts = tf.GraphOptions(place_pruned_graph=True)
    config = tf.ConfigProto(graph_options=gopts, allow_soft_placement=True)
    # config = tf.ConfigProto()

    if args.gpu:
        config.gpu_options.allow_growth = True
        # config.inter_op_parallelism_threads = 1
        #config.gpu_options.per_process_gpu_memory_fraction = 0.96
        if args.horovod:
            config.gpu_options.visible_device_list = str(hvd.local_rank())

    else:
        config = tf.ConfigProto(
            graph_options=gopts,
            intra_op_parallelism_threads=int(os.environ['OMP_NUM_THREADS']),
            inter_op_parallelism_threads=args.num_inter_ops,
Ejemplo n.º 24
0
    def _testDecoderFPropFloatHelper(self,
                                     func_inline=False,
                                     num_decoder_layers=1,
                                     target_seq_len=5,
                                     residual_start=0):
        """Computes decoder from params and computes loss with random inputs."""
        cluster = cluster_factory.ForTestingWorker(add_summary=True)
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                do_function_inlining=func_inline)))
        with cluster, self.session(graph=tf.Graph(),
                                   use_gpu=False,
                                   config=config) as sess:
            tf.set_random_seed(8372749040)
            vn_config = py_utils.VariationalNoiseParams(None, False, False)
            p = self._DecoderParams(vn_config)
            p.rnn_layers = num_decoder_layers
            p.residual_start = residual_start
            p.target_seq_len = target_seq_len
            dec = p.Instantiate()
            src_seq_len = 5
            src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 1]],
                            dtype=tf.float32))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings
            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            metrics = dec.FPropDefaultTheta(encoder_outputs, targets)
            loss = metrics['loss'][0]
            correct_predicts = metrics['fraction_of_correct_next_step_preds'][
                0]
            summaries = tf.summary.merge(
                tf.get_collection(tf.GraphKeys.SUMMARIES))

            tf.global_variables_initializer().run()
            loss_v, _ = sess.run([loss, correct_predicts])

            summaries.eval()

            return loss_v
Ejemplo n.º 25
0
lms_obj.run(graph=tf.get_default_graph())

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/MNIST_data/data/")


from tensorflow.core.protobuf import rewriter_config_pb2
rewrite_options = rewriter_config_pb2.RewriterConfig(disable_model_pruning=True,
            #constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
            #dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
            #layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
            #arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
            #min_graph_nodes=-1, 
            memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT)#SCHEDULING_HEURISTICS)

graph_options = tf.GraphOptions(rewrite_options=rewrite_options)#, infer_shapes=True)
config = tf.ConfigProto(graph_options=graph_options, allow_soft_placement=True, log_device_placement=True)
config.gpu_options.allow_growth=True

#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#run_metadata = tf.RunMetadata()

#graph = tf.get_default_graph()
#writer = tf.summary.FileWriter("./rewriter_graph1")
#writer.add_graph(graph=graph)

import numpy as np
picture = np.ones([batch_size, 200 * 200], dtype=np.float32)
picture_label = np.ones([batch_size], dtype=np.float32)

with tf.Session(config=config) as sess:
Ejemplo n.º 26
0
    def _testDecoderFPropGradientCheckerHelper(self, func_inline=False):
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                do_function_inlining=func_inline)))
        with self.session(graph=tf.Graph(), use_gpu=False,
                          config=config) as sess:
            tf.set_random_seed(8372749040)
            np.random.seed(274854)
            vn_config = py_utils.VariationalNoiseParams(None, False, False)
            p = self._DecoderParams(vn_config)
            p.dtype = tf.float64

            dec = p.Instantiate()
            src_seq_len = 5
            src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)),
                                  tf.float64)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float64)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 1]],
                            dtype=tf.float64))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings

            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            metrics = dec.FPropDefaultTheta(encoder_outputs, targets)
            loss = metrics['loss'][0]
            all_vars = tf.trainable_variables()
            grads = tf.gradients(loss, all_vars)

            def DenseGrad(var, grad):
                if isinstance(grad, tf.Tensor):
                    return grad
                elif isinstance(grad, tf.IndexedSlices):
                    return tf.unsorted_segment_sum(grad.values, grad.indices,
                                                   tf.shape(var)[0])

            dense_grads = [DenseGrad(x, y) for (x, y) in zip(all_vars, grads)]

            tf.global_variables_initializer().run()

            test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval())
            # Second run to make sure the function is determistic.
            test_utils.CompareToGoldenSingleFloat(self, 3.458078, loss.eval())

            symbolic_grads = [x.eval() for x in dense_grads if x is not None]
            numerical_grads = []
            for v in all_vars:
                numerical_grads.append(
                    test_utils.ComputeNumericGradient(sess, loss, v))

            for x, y in zip(symbolic_grads, numerical_grads):
                self.assertAllClose(x, y)
def freeze_graph_with_def_protos(
    input_graph_def,
    input_saver_def,
    input_checkpoint,
    output_node_names,
    restore_op_name,
    filename_tensor_name,
    clear_devices,
    initializer_nodes,
    optimize_graph=True,
    variable_names_blacklist=''):
  """Converts all variables in a graph and checkpoint into constants."""
  del restore_op_name, filename_tensor_name  # Unused by updated loading code.

  # 'input_checkpoint' may be a prefix if we're using Saver V2 format
  if not saver_lib.checkpoint_exists(input_checkpoint):
    raise ValueError(
        'Input checkpoint "' + input_checkpoint + '" does not exist!')

  if not output_node_names:
    raise ValueError(
        'You must supply the name of a node to --output_node_names.')

  # Remove all the explicit device specifications for this node. This helps to
  # make the graph more portable.
  if clear_devices:
    for node in input_graph_def.node:
      node.device = ''

  with tf.Graph().as_default():
    tf.import_graph_def(input_graph_def, name='')

    if optimize_graph:
      logging.info('Graph Rewriter optimizations enabled')
      rewrite_options = rewriter_config_pb2.RewriterConfig(
          optimize_tensor_layout=True)
      rewrite_options.optimizers.append('pruning')
      rewrite_options.optimizers.append('constfold')
      rewrite_options.optimizers.append('layout')
      graph_options = tf.GraphOptions(
          rewrite_options=rewrite_options, infer_shapes=True)
    else:
      logging.info('Graph Rewriter optimizations disabled')
      graph_options = tf.GraphOptions()
    config = tf.ConfigProto(graph_options=graph_options)
    with session.Session(config=config) as sess:
      if input_saver_def:
        saver = saver_lib.Saver(saver_def=input_saver_def)
        saver.restore(sess, input_checkpoint)
      else:
        var_list = {}
        reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint)
        var_to_shape_map = reader.get_variable_to_shape_map()
        for key in var_to_shape_map:
          try:
            tensor = sess.graph.get_tensor_by_name(key + ':0')
          except KeyError:
            # This tensor doesn't exist in the graph (for example it's
            # 'global_step' or a similar housekeeping element) so skip it.
            continue
          var_list[key] = tensor
        saver = saver_lib.Saver(var_list=var_list)
        saver.restore(sess, input_checkpoint)
        if initializer_nodes:
          sess.run(initializer_nodes)

      variable_names_blacklist = (variable_names_blacklist.split(',') if
                                  variable_names_blacklist else None)
      output_graph_def = graph_util.convert_variables_to_constants(
          sess,
          input_graph_def,
          output_node_names.split(','),
          variable_names_blacklist=variable_names_blacklist)

  return output_graph_def
Ejemplo n.º 28
0
    a1 = tf.ones((n, n))
    a2 = tf.ones((n, n))
with tf.device("cpu:1"):
    a3 = tf.matmul(a1, a2)
with tf.device("cpu:2"):
    a4 = tf.matmul(a1, a2)
with tf.device("cpu:3"):
    a5 = tf.matmul(a3, a4)

# Turn off graph optimizations
no_opt = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0,
                             do_common_subexpression_elimination=False,
                             do_function_inlining=False,
                             do_constant_folding=False)
config = tf.ConfigProto(
    graph_options=tf.GraphOptions(optimizer_options=no_opt),
    log_device_placement=True,
    allow_soft_placement=False,
    device_count={"CPU": 8},
    inter_op_parallelism_threads=3,
    intra_op_parallelism_threads=1)
sess = tf.Session(config=config)

run_metadata = tf.RunMetadata()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE,
                            output_partition_graphs=True)

# Run session.
sess.run(a5.op, options=run_options, run_metadata=run_metadata)

trace = timeline.Timeline(step_stats=run_metadata.step_stats)
Ejemplo n.º 29
0
def main(unused_argv):
    # Mnas optimize - set the proper image data format
    tf.keras.backend.set_image_data_format(FLAGS.data_format)
    # Mnas optimize - optimization flags
    # gpu_thread_count = 2
    # os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
    # os.environ['TF_GPU_THREAD_COUNT'] = str(gpu_thread_count)
    # os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
    # os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
    # enable mixed precision? -> Not much benefits seen yet
    # os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"

    # Horovod: initialize Horovod.
    if FLAGS.use_horovod:
        hvd.init()
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu if (FLAGS.tpu or FLAGS.use_tpu) else '',
        zone=FLAGS.tpu_zone,
        project=FLAGS.gcp_project)

    if FLAGS.use_async_checkpointing:
        save_checkpoints_steps = None
    else:
        if not FLAGS.use_horovod:
            save_checkpoints_steps = max(100, FLAGS.iterations_per_loop)
        else:
            save_checkpoints_steps = max(
                100, FLAGS.iterations_per_loop) if hvd.rank() == 0 else None
    config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        log_step_count_steps=FLAGS.log_step_count_steps,
        session_config=tf.ConfigProto(
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True))),
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            per_host_input_for_training=tf.contrib.tpu.InputPipelineConfig
            .PER_HOST_V2))  # pylint: disable=line-too-long

    if FLAGS.use_xla:
        config.session_config.graph_options.optimizer_options.global_jit_level = (
            tf.OptimizerOptions.ON_1)

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    if FLAGS.use_horovod:
        config.session_config.gpu_options.allow_growth = True
        config.session_config.gpu_options.visible_device_list = str(
            hvd.local_rank())

    # Validates Flags.
    if FLAGS.use_bfloat16 and FLAGS.use_keras:
        raise ValueError(
            'Keras layers do not have full support to bfloat16 activation training.'
            ' You have set use_bfloat as %s and use_keras as %s' %
            (FLAGS.use_bfloat16, FLAGS.use_keras))

    # Initializes model parameters.
    steps_per_epoch = FLAGS.num_train_images / FLAGS.train_batch_size
    steps_per_epoch = steps_per_epoch // hvd.size(
    ) if FLAGS.use_horovod else steps_per_epoch
    params = dict(steps_per_epoch=steps_per_epoch,
                  use_bfloat16=FLAGS.use_bfloat16,
                  quantized_training=FLAGS.quantized_training)
    if FLAGS.use_horovod:
        params['hvd'] = True
        params['hvd_curr_host'] = hvd.rank()
        params['hvd_num_hosts'] = hvd.size()
    mnasnet_est = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=mnasnet_model_fn,
        config=config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        export_to_tpu=FLAGS.export_to_tpu,
        params=params)

    # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states from
    # rank 0 to all other processes. This is necessary to ensure consistent
    # initialization of all workers when training is started with random weights or
    # restored from a checkpoint.
    if FLAGS.use_horovod:
        bcast_hook = hvd.BroadcastGlobalVariablesHook(0)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    if FLAGS.bigtable_instance:
        tf.logging.info('Using Bigtable dataset, table %s',
                        FLAGS.bigtable_table)
        select_train, select_eval = _select_tables_from_flags()
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetBigtableInput(
                is_training=is_training,
                use_bfloat16=False,
                transpose_input=FLAGS.transpose_input,
                selection=selection)
            for (is_training,
                 selection) in [(True, select_train), (False, select_eval)]
        ]
    else:
        if FLAGS.data_dir == FAKE_DATA_DIR:
            tf.logging.info('Using fake dataset.')
        else:
            tf.logging.info('Using dataset: %s', FLAGS.data_dir)
        imagenet_train, imagenet_eval = [
            imagenet_input.ImageNetInput(
                is_training=is_training,
                data_dir=FLAGS.data_dir,
                transpose_input=FLAGS.transpose_input,
                cache=FLAGS.use_cache and is_training,
                image_size=FLAGS.input_image_size,
                num_parallel_calls=FLAGS.num_parallel_calls,
                use_bfloat16=FLAGS.use_bfloat16)
            for is_training in [True, False]
        ]

    if FLAGS.mode == 'eval':
        eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time(
                )  # This time will include compilation time
                eval_results = mnasnet_est.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(ckpt).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break

            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    ckpt)

        if FLAGS.export_dir:
            export(mnasnet_est, FLAGS.export_dir, FLAGS.post_quantize)
    else:  # FLAGS.mode == 'train' or FLAGS.mode == 'train_and_eval'
        current_step = estimator._load_global_step_from_checkpoint_dir(  # pylint: disable=protected-access
            FLAGS.model_dir)

        tf.logging.info(
            'Training for %d steps (%.2f epochs in total). Current'
            ' step %d.', FLAGS.train_steps,
            FLAGS.train_steps / params['steps_per_epoch'], current_step)

        start_timestamp = time.time(
        )  # This time will include compilation time

        if FLAGS.mode == 'train':
            hooks = []
            if FLAGS.use_async_checkpointing:
                hooks.append(
                    async_checkpoint.AsyncCheckpointSaverHook(
                        checkpoint_dir=FLAGS.model_dir,
                        save_steps=max(100, FLAGS.iterations_per_loop)))
            mnasnet_est.train(input_fn=imagenet_train.input_fn,
                              max_steps=FLAGS.train_steps,
                              hooks=hooks)

        else:
            assert FLAGS.mode == 'train_and_eval'
            curr_rank = 0
            if FLAGS.use_horovod:
                curr_rank = hvd.rank()
            while current_step < FLAGS.train_steps:
                # Train for up to steps_per_eval number of steps.
                # At the end of training, a checkpoint will be written to --model_dir.
                next_checkpoint = min(current_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                if FLAGS.use_horovod:
                    # try dali pipeline
                    mnasnet_est.train(input_fn=imagenet_train.train_data_fn,
                                      max_steps=next_checkpoint,
                                      hooks=[bcast_hook])
                    # this uses the old tf data pipeline
                    # mnasnet_est.train(
                    #     input_fn=imagenet_train.input_fn, max_steps=next_checkpoint, hooks=[bcast_hook])
                else:
                    mnasnet_est.train(input_fn=imagenet_train.input_fn,
                                      max_steps=next_checkpoint)
                current_step = next_checkpoint

                tf.logging.info(
                    'Finished training up to step %d. Elapsed seconds %d. Hvd rank %d',
                    next_checkpoint, int(time.time() - start_timestamp),
                    curr_rank)

                # Evaluate the model on the most recent model in --model_dir.
                # Since evaluation happens in batches of --eval_batch_size, some images
                # may be excluded modulo the batch size. As long as the batch size is
                # consistent, the evaluated images are also consistent.
                eval_on_single_gpu = FLAGS.eval_on_single_gpu
                tf.logging.info('Starting to evaluate.')
                if eval_on_single_gpu:
                    if curr_rank == 0:
                        eval_results = mnasnet_est.evaluate(
                            input_fn=imagenet_eval.train_data_fn,  #input_fn
                            steps=FLAGS.num_eval_images //
                            FLAGS.eval_batch_size)
                        tf.logging.info(
                            'Eval results at step %d: %s. Hvd rank %d',
                            next_checkpoint, eval_results, curr_rank)
                else:
                    eval_results = mnasnet_est.evaluate(
                        input_fn=imagenet_eval.train_data_fn,  #input_fn
                        steps=FLAGS.num_eval_images // FLAGS.eval_batch_size)
                    tf.logging.info('Eval results at step %d: %s. Hvd rank %d',
                                    next_checkpoint, eval_results, curr_rank)

            elapsed_time = int(time.time() - start_timestamp)
            tf.logging.info(
                'Finished training up to step %d. Elapsed seconds %d.',
                FLAGS.train_steps, elapsed_time)
            if FLAGS.export_dir:
                export(mnasnet_est, FLAGS.export_dir, FLAGS.post_quantize)
Ejemplo n.º 30
0
def test_avgpool2d():
    ''' Run tests on the Wave custom avgpool2d operator.
    '''
    tf.reset_default_graph()
    # Turn off graph-rewriting optimizations
    config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)))

    iterations = 100

    for i in range(iterations):
        tf.reset_default_graph()

        # NCHW
        t_n = 1
        t_h = 64
        t_w = 64
        t_c = 2

        # window
        w_n = 1
        w_h = 2
        w_w = 2
        w_c = 1

        #strides
        s_n = 1
        s_h = 2
        s_w = 2
        s_c = 1

        # N H W C
        max_in = tf.get_variable("a", [t_n, t_h, t_w, t_c], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1))

        t_init = tf.global_variables_initializer()

        # SAME variant
        with tf.Session('', config=config) as sess:
            t_init.run()

            # print("Wave Kernel:\n-------------------------------------------------")

            z_op = waveflow.wavecomp_ops_module.wave_avg_pool_dfx(
                max_in,
                ksize=[w_n, w_h, w_w, w_c],
                strides=[s_n, s_h, s_w, s_c],
                padding='SAME',
                data_format='NHWC')

            # Base tensorflow. Only supports NHWC.
            z2_op = nn_ops.avg_pool(
                max_in,
                ksize=[w_n, w_h, w_w, w_c],
                strides=[s_n, s_h, s_w, s_c],
                padding='SAME',
                data_format='NHWC')

            # z = z_op.eval()
            # z2 = z2_op.eval()
            z, z2 = sess.run([z_op, z2_op])

            # print("\nTF:\n-------------------------------------------------")

            assert_str = "Failure on i: %d, mode: SAME" % (i)
            if not compare_tensor(z, z2, assert_str):
                print("z: shape: %s, %s" % (z.shape, z))
                print("z (np): shape: %s, %s" % (z2.shape, z2))
                print("\n\n")
                assert False

        # Valid variant
        with tf.Session('', config=config) as sess:
            t_init.run()

            # print("Wave Kernel:\n-------------------------------------------------")

            z_op = waveflow.wavecomp_ops_module.wave_avg_pool_dfx(
                max_in,
                ksize=[w_n, w_h, w_w, w_c],
                strides=[s_n, s_h, s_w, s_c],
                padding='VALID',
                data_format='NHWC')

            # Base tensorflow. Only supports NHWC.
            z2_op = nn_ops.avg_pool(
                max_in,
                ksize= [w_n, w_h, w_w, w_c],
                strides=[s_n, s_h, s_w, s_c],
                padding='VALID',
                data_format='NHWC')


            z, z2 = sess.run([z_op, z2_op])
            # print("\nTF:\n-------------------------------------------------")

            assert_str = "Failure on i: %d, mode: VALID" % (i)
            if not compare_tensor(z, z2, assert_str):
                print("z: shape: %s, %s" % (z.shape, z))
                print("z (np): shape: %s, %s" % (z2.shape, z2))
                print("\n\n")
                assert False

    return True