Ejemplo n.º 1
0
def train(model, data, gen, params):
    anim_frames = []

    with tf.Session() as session:
        tf.local_variables_initializer().run()
        tf.global_variables_initializer().run()

        for step in range(params.num_steps + 1):
            # update discriminator
            x = data.sample(params.batch_size)
            z = gen.sample(params.batch_size)
            loss_d, _, = session.run([model.loss_d, model.opt_d], {
                model.x: np.reshape(x, (params.batch_size, 1)),
                model.z: np.reshape(z, (params.batch_size, 1))
            })

            # update generator
            z = gen.sample(params.batch_size)
            loss_g, _ = session.run([model.loss_g, model.opt_g], {
                model.z: np.reshape(z, (params.batch_size, 1))
            })

            if step % params.log_every == 0:
                print('{}: {:.4f}\t{:.4f}'.format(step, loss_d, loss_g))

            if params.anim_path and (step % params.anim_every == 0):
                anim_frames.append(
                    samples(model, session, data, gen.range, params.batch_size)
                )

        if params.anim_path:
            save_animation(anim_frames, params.anim_path, gen.range)
        else:
            samps = samples(model, session, data, gen.range, params.batch_size)
            plot_distributions(samps, gen.range)
Ejemplo n.º 2
0
def test(model, config, prompts):

    sr = 24000 if 'blizzard' in config.data_path else 16000
    meta = data_input.load_meta(config.data_path)
    config.r = audio.r
    ivocab = meta['vocab']
    config.vocab_size = len(ivocab)

    with tf.device('/cpu:0'):
        batch_inputs = data_input.load_prompts(prompts, ivocab)
        config.num_prompts = len(prompts)

    with tf.Session() as sess:

        stft_mean = tf.get_variable('stft_mean', shape=(1025*audio.r,), dtype=tf.float16)
        stft_std = tf.get_variable('stft_std', shape=(1025*audio.r,), dtype=tf.float32)

        # initialize model
        model = model(config, batch_inputs, train=False)

        train_writer = tf.summary.FileWriter('log/' + config.save_path + '/test', sess.graph)

        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        saver = tf.train.Saver()

        print('restoring weights')
        latest_ckpt = tf.train.latest_checkpoint(
            'weights/' + config.save_path[:config.save_path.rfind('/')]
        )
        saver.restore(sess, latest_ckpt)

        stft_mean, stft_std = sess.run([stft_mean, stft_std])

        try:
            while(True):
                out = sess.run([
                    model.output,
                    model.alignments,
                    batch_inputs
                ])
                outputs, alignments, inputs = out

                print('saving samples')
                for out, words, align in zip(outputs, inputs['text'], alignments):
                    # store a sample to listen to
                    text = ''.join([ivocab[w] for w in words])
                    attention_plot = data_input.generate_attention_plot(align)
                    sample = audio.invert_spectrogram(out*stft_std + stft_mean)
                    merged = sess.run(tf.summary.merge(
                         [tf.summary.audio(text, sample[None, :], sr),
                          tf.summary.image(text, attention_plot)]
                    ))
                    train_writer.add_summary(merged, 0)
        except tf.errors.OutOfRangeError:
            coord.request_stop()
            coord.join(threads)
Ejemplo n.º 3
0
 def test_empty_labels_and_scores_gives_nan_auc(self):
   with self.test_session():
     labels = tf.constant([], shape=[0], dtype=tf.bool)
     scores = tf.constant([], shape=[0], dtype=tf.float32)
     score_range = [0, 1.]
     auc, update_op = tf.contrib.metrics.auc_using_histogram(labels, scores,
                                                             score_range)
     tf.local_variables_initializer().run()
     update_op.run()
     self.assertTrue(np.isnan(auc.eval()))
Ejemplo n.º 4
0
  def _check_auc(self,
                 nbins=100,
                 desired_auc=0.75,
                 score_range=None,
                 num_records=50,
                 frac_true=0.5,
                 atol=0.05,
                 num_updates=10):
    """Check auc accuracy against synthetic data.

    Args:
      nbins:  nbins arg from contrib.metrics.auc_using_histogram.
      desired_auc:  Number in [0, 1].  The desired auc for synthetic data.
      score_range:  2-tuple, (low, high), giving the range of the resultant
        scores.  Defaults to [0, 1.].
      num_records:  Positive integer.  The number of records to return.
      frac_true:  Number in (0, 1).  Expected fraction of resultant labels that
        will be True.  This is just in expectation...more or less may actually
        be True.
      atol:  Absolute tolerance for final AUC estimate.
      num_updates:  Update internal histograms this many times, each with a new
        batch of synthetic data, before computing final AUC.

    Raises:
      AssertionError: If resultant AUC is not within atol of theoretical AUC
        from synthetic data.
    """
    score_range = [0, 1.] or score_range
    with self.test_session():
      labels = tf.placeholder(tf.bool, shape=[num_records])
      scores = tf.placeholder(tf.float32, shape=[num_records])
      auc, update_op = tf.contrib.metrics.auc_using_histogram(labels,
                                                              scores,
                                                              score_range,
                                                              nbins=nbins)
      tf.local_variables_initializer().run()
      # Updates, then extract auc.
      for _ in range(num_updates):
        labels_a, scores_a = synthetic_data(desired_auc, score_range,
                                            num_records, self.rng, frac_true)
        update_op.run(feed_dict={labels: labels_a, scores: scores_a})
      labels_a, scores_a = synthetic_data(desired_auc, score_range, num_records,
                                          self.rng, frac_true)
      # Fetch current auc, and verify that fetching again doesn't change it.
      auc_eval = auc.eval()
      self.assertAlmostEqual(auc_eval, auc.eval(), places=5)

    msg = ('nbins: %s, desired_auc: %s, score_range: %s, '
           'num_records: %s, frac_true: %s, num_updates: %s') % (nbins,
                                                                 desired_auc,
                                                                 score_range,
                                                                 num_records,
                                                                 frac_true,
                                                                 num_updates)
    np.testing.assert_allclose(desired_auc, auc_eval, atol=atol, err_msg=msg)
Ejemplo n.º 5
0
    def train(self, DGTrain, DGTest, saver=True):

        epoch = DGTrain.length

        self.LearningRateSchedule(self.LEARNING_RATE, self.K, epoch)

        trainable_var = tf.trainable_variables()
        
        self.regularize_model()
        self.optimization(trainable_var)
        self.ExponentialMovingAverage(trainable_var, self.DECAY_EMA)

        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()

        self.summary_test_writer = tf.summary.FileWriter(self.LOG + '/test',
                                            graph=self.sess.graph)

        self.summary_writer = tf.summary.FileWriter(self.LOG + '/train', graph=self.sess.graph)
        merged_summary = tf.summary.merge_all()
        steps = self.STEPS

        
        # for i in range(Xval.shape[0]):
        #     imsave("/tmp/image_{}.png".format(i), Xval[i])
        #     imsave("/tmp/label_{}.png".format(i), Yval[i,:,:,0])



        for step in range(steps):
            batch_data, batch_labels = DGTrain.Batch(0, self.BATCH_SIZE)
            feed_dict = {self.input_node: batch_data,
                         self.train_labels_node: batch_labels}

            # self.optimizer is replaced by self.training_op for the exponential moving decay
            _, l, lr, predictions, s = self.sess.run(
                        [self.training_op, self.loss, self.learning_rate,
                         self.train_prediction, merged_summary],
                        feed_dict=feed_dict)

            if step % self.N_PRINT == 0:
                i = datetime.now()
                print i.strftime('%Y/%m/%d %H:%M:%S: \n ')
                self.summary_writer.add_summary(s, step)                
                error, acc, acc1, recall, prec, f1 = self.error_rate(predictions, batch_labels, step)
                print('  Step %d of %d' % (step, steps))
                print('  Learning rate: %.5f \n') % lr
                print('  Mini-batch loss: %.5f \n       Accuracy: %.1f%% \n       acc1: %.1f%% \n       recall: %1.f%% \n       prec: %1.f%% \n       f1 : %1.f%% \n' % 
                      (l, acc, acc1, recall, prec, f1))
                self.Validation(DGTest, step)
def main(model_config, train_config, track_config):
  # Create training directory
  train_dir = train_config['train_dir']
  if not tf.gfile.IsDirectory(train_dir):
    tf.logging.info('Creating training directory: %s', train_dir)
    tf.gfile.MakeDirs(train_dir)

  # Build the Tensorflow graph
  g = tf.Graph()
  with g.as_default():
    # Set fixed seed
    np.random.seed(train_config['seed'])
    tf.set_random_seed(train_config['seed'])

    # Build the model
    model = siamese_model.SiameseModel(model_config, train_config, mode='inference')
    model.build()

    # Save configurations for future reference
    save_cfgs(train_dir, model_config, train_config, track_config)

    saver = tf.train.Saver(tf.global_variables(),
                           max_to_keep=train_config['max_checkpoints_to_keep'])

    # Dynamically allocate GPU memory
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_options)

    sess = tf.Session(config=sess_config)
    model_path = tf.train.latest_checkpoint(train_config['train_dir'])

    if not model_path:
      # Initialize all variables
      sess.run(tf.global_variables_initializer())
      sess.run(tf.local_variables_initializer())
      start_step = 0

      # Load pretrained embedding model if needed
      if model_config['embed_config']['embedding_checkpoint_file']:
        model.init_fn(sess)

    else:
      logging.info('Restore from last checkpoint: {}'.format(model_path))
      sess.run(tf.local_variables_initializer())
      saver.restore(sess, model_path)
      start_step = tf.train.global_step(sess, model.global_step.name) + 1

    checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt')
    saver.save(sess, checkpoint_path, global_step=start_step)
Ejemplo n.º 7
0
	def predict(self):
		import cv2
		import glob
		import numpy as np
		# TODO 不应该这样写,应该直接读图片预测,而不是从tfrecord读取,因为顺序变了,无法对应
		predict_file_path = glob.glob(os.path.join(ORIGIN_PREDICT_DIRECTORY, '*.tif'))
		print(len(predict_file_path))
		ckpt_path = CHECK_POINT_PATH
		all_parameters_saver = tf.train.Saver()
		with tf.Session() as sess:  # 开始一个会话
			sess.run(tf.global_variables_initializer())
			sess.run(tf.local_variables_initializer())
			# summary_writer = tf.summary.FileWriter(FLAGS.tb_dir, sess.graph)
			# tf.summary.FileWriter(FLAGS.model_dir, sess.graph)
			all_parameters_saver.restore(sess=sess, save_path=ckpt_path)
			for index, image_path in enumerate(predict_file_path):
				# image = cv2.imread(image_path, flags=0)
				image = np.reshape(a=cv2.imread(image_path, flags=0), newshape=(1, INPUT_IMG_WIDE, INPUT_IMG_HEIGHT, INPUT_IMG_CHANNEL))
				predict_image = sess.run(
					tf.argmax(input=self.prediction, axis=3),
					feed_dict={
						self.input_image: image,
						self.keep_prob: 1.0, self.lamb: 0.004
					}
				)
				cv2.imwrite(os.path.join(PREDICT_SAVED_DIRECTORY, '%d.jpg' % index), predict_image[0] * 255)
		print('Done prediction')
def evaluate():
    """Eval ocr for a number of steps."""
    with tf.Graph().as_default() as g:
        images, labels, seq_lengths = ocr.inputs()
        logits, timesteps = ocr.inference(images, FLAGS.eval_batch_size, train=True)
        ler = ocr.create_label_error_rate(logits, labels, timesteps)
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        config = tf.ConfigProto(
            device_count={'GPU': 0}
        )
        sess = tf.Session(config=config)
        sess.run(init_op)

        saver = tf.train.Saver()

        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

        while True:
            eval_once(saver, summary_writer, ler, summary_op)
            if FLAGS.run_once:
                break
            # print("Waiting for next evaluation for " + str(FLAGS.eval_interval_secs) + " sec")
            time.sleep(FLAGS.eval_interval_secs)
Ejemplo n.º 9
0
def main(argv):
  del argv  # Unused.
  # Sanity check on the GCS bucket URL.
  if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"):
    print("ERROR: Invalid GCS bucket URL: \"%s\"" % FLAGS.gcs_bucket_url)
    sys.exit(1)

  # Verify that writing to the records file in GCS works.
  print("\n=== Testing writing and reading of GCS record file... ===")
  example_data = create_examples(FLAGS.num_examples, 5)
  with tf.python_io.TFRecordWriter(FLAGS.gcs_bucket_url) as hf:
    for e in example_data:
      hf.write(e.SerializeToString())

    print("Data written to: %s" % FLAGS.gcs_bucket_url)

  # Verify that reading from the tfrecord file works and that
  # tf_record_iterator works.
  record_iter = tf.python_io.tf_record_iterator(FLAGS.gcs_bucket_url)
  read_count = 0
  for _ in record_iter:
    read_count += 1
  print("Read %d records using tf_record_iterator" % read_count)

  if read_count != FLAGS.num_examples:
    print("FAIL: The number of records read from tf_record_iterator (%d) "
          "differs from the expected number (%d)" % (read_count,
                                                     FLAGS.num_examples))
    sys.exit(1)

  # Verify that running the read op in a session works.
  print("\n=== Testing TFRecordReader.read op in a session... ===")
  with tf.Graph().as_default() as _:
    filename_queue = tf.train.string_input_producer([FLAGS.gcs_bucket_url],
                                                    num_epochs=1)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      sess.run(tf.local_variables_initializer())
      tf.train.start_queue_runners()
      index = 0
      for _ in range(FLAGS.num_examples):
        print("Read record: %d" % index)
        sess.run(serialized_example)
        index += 1

      # Reading one more record should trigger an exception.
      try:
        sess.run(serialized_example)
        print("FAIL: Failed to catch the expected OutOfRangeError while "
              "reading one more record than is available")
        sys.exit(1)
      except tf.errors.OutOfRangeError:
        print("Successfully caught the expected OutOfRangeError while "
              "reading one more record than is available")

  create_dir_test()
  create_object_test()
Ejemplo n.º 10
0
  def testEvaluationLoopTimeout(self):
    _, update_op = slim.metrics.streaming_accuracy(
        self._predictions, self._labels)
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Create checkpoint and log directories.
    chkpt_dir = os.path.join(self.get_temp_dir(), 'tmp_logs/')
    gfile.MakeDirs(chkpt_dir)
    logdir = os.path.join(self.get_temp_dir(), 'tmp_logs2/')
    gfile.MakeDirs(logdir)

    # Save initialized variables to checkpoint directory.
    saver = tf.train.Saver()
    with self.test_session() as sess:
      init_op.run()
      saver.save(sess, os.path.join(chkpt_dir, 'chkpt'))

    # Run the evaluation loop with a timeout.
    with self.test_session() as sess:
      start = time.time()
      slim.evaluation.evaluation_loop(
          '', chkpt_dir, logdir, eval_op=update_op,
          eval_interval_secs=2.0, timeout=6.0)
      end = time.time()

      # Check we've waited for the timeout.
      self.assertGreater(end - start, 6.0)

      # Then the timeout kicked in and stops the loop.
      self.assertLess(end - start, 8.0)
Ejemplo n.º 11
0
    def test_smoke(self):
        """Smoke test for a full pipeline."""
        _, tname = tempfile.mkstemp()
        num = 100
        num_epochs = 2
        self._write_examples(tname, [self._random_io_data() for _ in range(num)])
        tensors = data.read_from_files([tname], shuffle=True, num_epochs=num_epochs)
        batches = lin.shuffle_batch(tensors=tensors, batch_size=5)

        count = 0
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            try:
                while True:
                    actual = sess.run(batches)
                    count += len(actual[0])
            except tf.errors.OutOfRangeError as ex:
                coord.request_stop(ex=ex)
            finally:
                coord.request_stop()
                coord.join(threads)
        self.assertEqual(num * num_epochs, count)
        os.remove(tname)
Ejemplo n.º 12
0
  def get_hit_rate_and_ndcg(self, predicted_scores_by_user, items_by_user,
                            top_k=rconst.TOP_K, match_mlperf=False):
    rconst.TOP_K = top_k
    rconst.NUM_EVAL_NEGATIVES = predicted_scores_by_user.shape[1] - 1
    batch_size = items_by_user.shape[0]

    users = np.repeat(np.arange(batch_size)[:, np.newaxis],
                      rconst.NUM_EVAL_NEGATIVES + 1, axis=1)
    users, items, duplicate_mask = \
      data_pipeline.BaseDataConstructor._assemble_eval_batch(
          users, items_by_user[:, -1:], items_by_user[:, :-1], batch_size)

    g = tf.Graph()
    with g.as_default():
      logits = tf.convert_to_tensor(
          predicted_scores_by_user.reshape((-1, 1)), tf.float32)
      softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
                                  logits], axis=1)
      duplicate_mask = tf.convert_to_tensor(duplicate_mask, tf.float32)

      metric_ops = neumf_model.compute_eval_loss_and_metrics(
          logits=logits, softmax_logits=softmax_logits,
          duplicate_mask=duplicate_mask, num_training_neg=NUM_TRAIN_NEG,
          match_mlperf=match_mlperf).eval_metric_ops

      hr = metric_ops[rconst.HR_KEY]
      ndcg = metric_ops[rconst.NDCG_KEY]

      init = [tf.global_variables_initializer(),
              tf.local_variables_initializer()]

    with self.test_session(graph=g) as sess:
      sess.run(init)
      return sess.run([hr[1], ndcg[1]])
Ejemplo n.º 13
0
def compute_one_decoding_video_metrics(iterator, feed_dict, num_videos):
  """Computes the average of all the metric for one decoding.

  Args:
    iterator: dataset iterator.
    feed_dict: feed dict to initialize iterator.
    num_videos: number of videos.

  Returns:
    Dictionary which contains the average of each metric per frame.
  """
  output, target = iterator.get_next()

  metrics_dict = compute_metrics(output, target)
  metrics_names, metrics = zip(*six.iteritems(metrics_dict))
  means, update_ops = tf.metrics.mean_tensor(metrics)

  with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    initalizer = iterator._initializer  # pylint: disable=protected-access
    if initalizer is not None:
      sess.run(initalizer, feed_dict=feed_dict)

    # Compute mean over dataset
    for i in range(num_videos):
      print("Computing video: %d" % i)
      sess.run(update_ops)
    averaged_metrics = sess.run(means)

    results = dict(zip(metrics_names, averaged_metrics))
    return results
Ejemplo n.º 14
0
    def test(self, p1, p2, steps):
        loss, roc = 0., 0.
        acc, F1, recall = 0., 0., 0.
        precision, jac, AJI = 0., 0., 0.
        init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())
        self.sess.run(init_op)
        self.Saver()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for step in range(steps):  
            feed_dict = {self.is_training: False} 
            l,  prob, batch_labels = self.sess.run([self.loss, self.train_prediction,
                                                               self.train_labels_node], feed_dict=feed_dict)
            loss += l
            out = ComputeMetrics(prob[0,:,:,1], batch_labels[0,:,:,0], p1, p2)
            acc += out[0]
            roc += out[1]
            jac += out[2]
            recall += out[3]
            precision += out[4]
            F1 += out[5]
            AJI += out[6]
        coord.request_stop()
        coord.join(threads)
        loss, acc, F1 = np.array([loss, acc, F1]) / steps
        recall, precision, roc = np.array([recall, precision, roc]) / steps
        jac, AJI = np.array([jac, AJI]) / steps
        return loss, acc, F1, recall, precision, roc, jac, AJI
	def predict(self):
		print 'Running inference...'
		self.sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()))
		self.load_weights('/Users/shashank/TensorFlow/SPN/weights/')
		coord = tf.train.Coordinator()
		threads = tf.train.start_queue_runners(sess=self.sess,coord=coord)

		result = []
		truth = []
		count =0
		try:
			while not coord.should_stop():
				print count
				batch_imgs, batch_labels, batch_landmarks, batch_visibility, batch_pose, batch_gender = self.sess.run([self.images,self.labels,self.land, self.vis, self.po, self.gen])
				batch_imgs = (batch_imgs - 127.5) / 128.0
				
				net_preds = self.sess.run(self.net_output, feed_dict={self.X: batch_imgs})
				result.append(np.concatenate(net_preds, axis=1))
				truth.append(np.concatenate([batch_labels[:, np.newaxis], batch_landmarks, batch_visibility, batch_pose, batch_gender], axis=1))
				count += 1

		except tf.errors.OutOfRangeError:
			print('Done training -- epoch limit reached')
		finally:
			coord.request_stop()

		coord.join(threads)	
		np.save('test_results', np.concatenate(result, axis = 0))
		np.save('truth', np.concatenate(truth, axis = 0))
Ejemplo n.º 16
0
    def initializeOrRestore(self):

        self.ckptDir = os.path.join(self.checkpoint_dir, self.dataset.name)
        self.ckptPrefix = os.path.join(self.ckptDir, self.name, self.name)
        vgg_ckpt_file = os.path.join(self.ckptDir, 'vgg_16', 'vgg_16.ckpt')
        mt_ckpt_file = layers.latest_checkpoint(os.path.join(self.ckptDir, 'mt'))
        # ckpt_file = layers.latest_checkpoint(os.path.join(self.ckptDir, 'vgg_16', 'vgg_16.ckpt'))
        globalVars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

        if vgg_ckpt_file is not None and tf.train.checkpoint_exists(vgg_ckpt_file):
            varsInCkpt, varsNotInCkpt = layers.scan_checkpoint_for_vars(vgg_ckpt_file, globalVars)
            if len(varsInCkpt) != 0:
                restorationSaver = tf.train.Saver(varsInCkpt)
                self.sess.run(tf.report_uninitialized_variables(var_list=varsInCkpt))
                restorationSaver.restore(self.sess, vgg_ckpt_file)
        else:
            varsNotInCkpt = globalVars

        if mt_ckpt_file is not None and tf.train.checkpoint_exists(mt_ckpt_file):
            varsInCkpt, varsNotInCkpt = layers.scan_checkpoint_for_vars(mt_ckpt_file, varsNotInCkpt)
            varsInCkpt, varsNotInCkpt = layers.replaceVarInListsByName(varsInCkpt, varsNotInCkpt, 'fc6')
            if len(varsInCkpt) != 0:
                restorationSaver = tf.train.Saver(varsInCkpt)
                self.sess.run(tf.report_uninitialized_variables(var_list=varsInCkpt))
                restorationSaver.restore(self.sess, mt_ckpt_file)
        else:
            varsNotInCkpt = globalVars

        self.saver = tf.train.Saver()
        self.sess.run(tf.group(tf.variables_initializer(varsNotInCkpt), tf.local_variables_initializer()))
Ejemplo n.º 17
0
def parallel_acc_by_tags(model, sess, max_parallel_calcs, data_folder, read_func, from_file=None, data_set="test",
                         feature="images", orientations=None):
    total_images = 0
    if orientations is None:
        orientations = [0, 90, 180, 270]
    images, labels, tags = input_pipeline(data_folder_loc, max_parallel_calcs, data_set=data_set,
                                          feature=feature, num_images=None,
                                          binary_file=False, orientations=orientations,
                                          from_file=from_file, num_epochs=1)

    incorrect_images_list = tf.Variable([], dtype=tf.string, trainable=False, name="Incorrect_images")
    adder_image_names = tf.placeholder(dtype=tf.string, shape=[None], name="Adder_images")
    new_incorrect_images_list = tf.concat(0, [incorrect_images_list, adder_image_names])
    add_incorrect_images = tf.assign(incorrect_images_list, new_incorrect_images_list, use_locking=True,
                                     validate_shape=False)

    incorrect_labels_list = tf.Variable([], dtype=tf.int32, trainable=False, name="Incorrect_image_labels")
    adder_image_labels = tf.placeholder(dtype=tf.int32, shape=[None], name="Adder_image_labels")
    new_incorrect_labels_list = tf.concat(0, [incorrect_labels_list, adder_image_labels])
    add_incorrect_labels = tf.assign(incorrect_labels_list, new_incorrect_labels_list, use_locking=True,
                                     validate_shape=False)

    init_ops = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
    sess.run(init_ops)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    steps = 0
    try:
        print("Checking Accuracy")
        while not coord.should_stop():
            steps += 1
            raw_imgs_list, labels_list, tags_list = sess.run([images, labels, tags])
            imgs_list = read_func(raw_imgs_list)
            preds = sess.run(model.correct_predictions, feed_dict={model.inputs: imgs_list, model.testy: labels_list,
                                                                   model.keep_probs: 1})
            total_images += len(preds)
            incorrect_indices = np.where(preds == 0)

            # Uses locking so we do not lose any incorrect classifications
            sess.run(add_incorrect_images, feed_dict={adder_image_names: tags_list[incorrect_indices]})
            sess.run(add_incorrect_labels, feed_dict={adder_image_labels: labels_list[incorrect_indices]})

            if steps % 100 == 0:
                print("Calculated " + str(steps*max_parallel_calcs) + " files")
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        # When done, ask the threads to stop.
        coord.request_stop()
    coord.join(threads)
    inc_name = sess.run(incorrect_images_list)
    inc_label = sess.run(incorrect_labels_list)
    print("Correct classifications: " + str(total_images - len(inc_name)))
    print("Total images: " + str(total_images))
    print("Accuracy: " + str((total_images - len(inc_name))/total_images))
    with open(os.path.join(data_folder, "incorrect.txt"), 'w') as f:
        for i in range(len(inc_name)):
            f.write(os.path.join(data_folder, inc_name[i].decode('utf-8')) + ', ' + str(inc_label[i]*90) + '\n')
    sess.close()
Ejemplo n.º 18
0
 def test_input_pipeline(self):
     Xs, Ys = dsu.tiny_imagenet_load()
     n_batches = 0
     batch_size = 10
     with tf.Graph().as_default(), tf.Session() as sess:
         batch_generator = dsu.create_input_pipeline(
             Xs[:100],
             batch_size=batch_size,
             n_epochs=1,
             shape=(64, 64, 3),
             crop_shape=(64, 64, 3))
         init_op = tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
         sess.run(init_op)
         coord = tf.train.Coordinator()
         tf.get_default_graph().finalize()
         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
         try:
             while not coord.should_stop():
                 batch = sess.run(batch_generator)
                 assert (batch.shape == (batch_size, 64, 64, 3))
                 n_batches += 1
         except tf.errors.OutOfRangeError:
             pass
         finally:
             coord.request_stop()
         coord.join(threads)
     assert (n_batches == 10)
Ejemplo n.º 19
0
def main():
    filelist = tf.train.match_filenames_once(["data/mini/part-0", "data/mini/part-1"])
    filename_queue = tf.train.string_input_producer(filelist,
            shared_name='input_file_name_queue',
                                                    num_epochs=5)
    #############################
    new_filename_queue = tf.FIFOQueue(1, tf.string)
    qr = CheckpointQueueRunner(filename_queue, new_filename_queue, 3)
    tf.train.add_queue_runner(qr)
    #############################
    reader = user_ops.SmStandardKvReader("[dat]", "[common]")
    file_name, record = reader.read(new_filename_queue)

    batch_record = tf.train.batch([record], batch_size=2, num_threads=5,
                        capacity=5000, allow_smaller_final_batch=True)

    init_op = [tf.local_variables_initializer(), tf.global_variables_initializer()]
    with tf.Session() as sess:
        # otherwise FIFOQueue will be closed before read
        sess.run(init_op)
        print("*" * 40)
        print(sess.run(filelist))
        print("*" * 40)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        input_schema = "data/mini/conf/input_schema.json"
        parse_schema = "data/mini/conf/parse_schema.json"
        standard_kv_parser = lib_parser.StandardKvParser(batch_record, input_schema, parse_schema)
        tensor_dict = standard_kv_parser.get_tensor_dict()

        for i in range(3):
            print(sess.run(tensor_dict))
        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 20
0
def main(output_dir, summaries_every, num_steps):
  graph = tf.Graph()


  with graph.as_default():
    features = tf.placeholder(tf.float32, shape=[4, 2])
    labels = tf.placeholder(tf.int32, shape=[4])

    train_op, loss, gs, update_acc = make_graph(features, labels)
    init = tf.global_variables_initializer()
    init_local = tf.local_variables_initializer()
    summary_op = tf.summary.merge_all()


  writer = tf.summary.FileWriter(output_dir, graph=graph, flush_secs=1)

  with tf.Session(graph=graph) as sess:
    init.run()
    init_local.run()
    step = 0
    xy = np.array([
        [True, False],
        [True, True],
        [False, False],
        [False, True]
    ], dtype=np.float)
    y_ = np.array([True, False, False, True], dtype=np.int32)
    while step < num_steps:

      _, _, step, loss_value, summaries = sess.run(
          [train_op, update_acc, gs, loss, summary_op],
          feed_dict={features: xy, labels: y_}
      )
      if step % summaries_every == 0:
        writer.add_summary(summaries, global_step=step)
Ejemplo n.º 21
0
  def execute_cpu(self, graph_fn, inputs):
    """Constructs the graph, executes it on CPU and returns the result.

    Args:
      graph_fn: a callable that constructs the tensorflow graph to test. The
        arguments of this function should correspond to `inputs`.
      inputs: a list of numpy arrays to feed input to the computation graph.

    Returns:
      A list of numpy arrays or a scalar returned from executing the tensorflow
      graph.
    """
    with self.test_session(graph=tf.Graph()) as sess:
      placeholders = [tf.placeholder_with_default(v, v.shape) for v in inputs]
      results = graph_fn(*placeholders)
      sess.run([tf.global_variables_initializer(), tf.tables_initializer(),
                tf.local_variables_initializer()])
      materialized_results = sess.run(results, feed_dict=dict(zip(placeholders,
                                                                  inputs)))

      if (hasattr(materialized_results, '__len__') and
          len(materialized_results) == 1 and
          (isinstance(materialized_results, list) or
           isinstance(materialized_results, tuple))):
        materialized_results = materialized_results[0]
    return materialized_results
Ejemplo n.º 22
0
  def testSummariesAreFlushedToDiskWithoutGlobalStep(self):
    output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step')
    if tf.gfile.Exists(output_dir):  # For running on jenkins.
      tf.gfile.DeleteRecursively(output_dir)

    names_to_metrics, names_to_updates = self._create_names_to_metrics(
        self._predictions, self._labels)

    for k in names_to_metrics:
      v = names_to_metrics[k]
      tf.summary.scalar(k, v)

    summary_writer = tf.train.SummaryWriter(output_dir)

    initial_op = tf.group(tf.global_variables_initializer(),
                          tf.local_variables_initializer())
    eval_op = tf.group(*names_to_updates.values())

    with self.test_session() as sess:
      slim.evaluation.evaluation(
          sess,
          initial_op=initial_op,
          eval_op=eval_op,
          summary_op=tf.summary.merge_all(),
          summary_writer=summary_writer)

      names_to_values = {name: names_to_metrics[name].eval()
                         for name in names_to_metrics}
    self._verify_summaries(output_dir, names_to_values)
Ejemplo n.º 23
0
  def test_batch_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("A\nB\nC\nD\nE\n")

    batch_size = 3
    queue_capacity = 10
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = tf.contrib.learn.io.read_batch_examples(
          [filename], batch_size, reader=tf.TextLineReader,
          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
          read_batch_size=10, name=name)
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run(inputs), [b"A", b"B", b"C"])
      self.assertAllEqual(session.run(inputs), [b"D", b"E"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 24
0
def run():
    with tf.Session() as sess:
        print("start")
        feature = {'image': tf.FixedLenFeature([], tf.string),
                   'label': tf.FixedLenFeature([], tf.int64)}
        # Create a list of filenames and pass it to a queue
        print(data_path)
        filename_queue = tf.train.string_input_producer(data_path, num_epochs=1)
        # Define a reader and read the next record
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        # Decode the record read by the reader
        features = tf.parse_single_example(serialized_example, features=feature)
        # Convert the image data from string back to the numbers
        image = tf.decode_raw(features['image'], tf.uint8)
        # image = tf.cast(image, tf.int32)

        # Cast label data into int32
        label = tf.cast(features['label'], tf.int32)
        # Reshape image data into the original shape
        init_op = [tf.global_variables_initializer(), tf.local_variables_initializer()]
        sess.run(init_op)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        train_list = []
        for i in range(1000):
            example, l = sess.run([image, label])
            train_list.append((example,l))
            # print (example, l)
        coord.request_stop()
        coord.join(threads)
        return train_list
# run()
Ejemplo n.º 25
0
  def test_keyed_read_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("ABC\nDEF\nGHK\n")

    batch_size = 1
    queue_capacity = 5
    name = "my_batch"

    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      keys, inputs = tf.contrib.learn.io.read_keyed_batch_examples(
          filename, batch_size,
          reader=tf.TextLineReader, randomize_input=False,
          num_epochs=1, queue_capacity=queue_capacity, name=name)
      self.assertAllEqual((None,), keys.get_shape().as_list())
      self.assertAllEqual((None,), inputs.get_shape().as_list())
      session.run(tf.local_variables_initializer())

      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(session, coord=coord)

      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":1"], [b"ABC"]])
      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":2"], [b"DEF"]])
      self.assertAllEqual(session.run([keys, inputs]),
                          [[filename.encode("utf-8") + b":3"], [b"GHK"]])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)

      coord.request_stop()
      coord.join(threads)
Ejemplo n.º 26
0
def blend_images(data_folder1, data_folder2, out_folder, alpha=.5):
    filename_queue = tf.placeholder(dtype=tf.string)
    label = tf.placeholder(dtype=tf.int32)
    tensor_image = tf.read_file(filename_queue)

    image = tf.image.decode_jpeg(tensor_image, channels=3)

    multiplier = tf.div(tf.constant(224, tf.float32),
                        tf.cast(tf.maximum(tf.shape(image)[0], tf.shape(image)[1]), tf.float32))
    x = tf.cast(tf.round(tf.mul(tf.cast(tf.shape(image)[0], tf.float32), multiplier)), tf.int32)
    y = tf.cast(tf.round(tf.mul(tf.cast(tf.shape(image)[1], tf.float32), multiplier)), tf.int32)
    image = tf.image.resize_images(image, [x, y])

    image = tf.image.rot90(image, k=label)

    image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)
    sess = tf.Session()
    sess.run(tf.local_variables_initializer())
    for root, folders, files in os.walk(data_folder1):
        for each in files:
            if each.find('.jpg') >= 0:
                img1 = Image.open(os.path.join(root, each))
                img2_path = os.path.join(root.replace(data_folder1, data_folder2), each.split("-")[-1])
                rotation = int(each.split("-")[1])
                img2 = sess.run(image, feed_dict={filename_queue: img2_path, label: rotation})
                imsave(os.path.join(os.getcwd(), "temp", "temp.jpg"), img2)
                img2 = Image.open(os.path.join(os.getcwd(), "temp", "temp.jpg"))
                out_image = Image.blend(img1, img2, alpha)
                outfile = os.path.join(root.replace(data_folder1, out_folder), each)
                if not os.path.exists(os.path.split(outfile)[0]):
                    os.makedirs(os.path.split(outfile)[0])
                out_image.save(outfile)
            else:
                print(each)
    sess.close()
Ejemplo n.º 27
0
    def testRoundtrip(self, rate=0.25, count=5, n=500):
        """Tests `resample(x, weights)` and resample(resample(x, rate), 1/rate)`."""

        foo = self.get_values(count)
        bar = self.get_values(count)
        weights = self.get_weights(count)

        resampled_in, rates = tf.contrib.training.weighted_resample([foo, bar], tf.constant(weights), rate, seed=123)

        resampled_back_out = tf.contrib.training.resample_at_rate(resampled_in, 1.0 / rates, seed=456)

        init = tf.local_variables_initializer()
        with self.test_session() as s:
            s.run(init)  # initialize

            # outputs
            counts_resampled = collections.Counter()
            counts_reresampled = collections.Counter()
            for _ in range(n):
                resampled_vs, reresampled_vs = s.run([resampled_in, resampled_back_out])

                self.assertAllEqual(resampled_vs[0], resampled_vs[1])
                self.assertAllEqual(reresampled_vs[0], reresampled_vs[1])

                for v in resampled_vs[0]:
                    counts_resampled[v] += 1
                for v in reresampled_vs[0]:
                    counts_reresampled[v] += 1

            # assert that resampling worked as expected
            self.assert_expected(weights, rate, counts_resampled, n)

            # and that re-resampling gives the approx identity.
            self.assert_expected([1.0 for _ in weights], 1.0, counts_reresampled, n, abs_delta=0.1 * n * count)
	def test(self):
		self.test_setup()

		self.sess.run(tf.global_variables_initializer())
		self.sess.run(tf.local_variables_initializer())

		# load checkpoint
		checkpointfile = self.conf.modeldir+ '/model.ckpt-' + str(self.conf.valid_step)
		self.load(self.loader, checkpointfile)

		# Start queue threads.
		threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess)

		# Test!
		confusion_matrix = np.zeros((self.conf.num_classes, self.conf.num_classes), dtype=np.int)
		for step in range(self.conf.valid_num_steps):
			preds, _, _, c_matrix = self.sess.run([self.pred, self.accu_update_op, self.mIou_update_op, self.confusion_matrix])
			confusion_matrix += c_matrix
			if step % 100 == 0:
				print('step {:d}'.format(step))
		print('Pixel Accuracy: {:.3f}'.format(self.accu.eval(session=self.sess)))
		print('Mean IoU: {:.3f}'.format(self.mIoU.eval(session=self.sess)))
		self.compute_IoU_per_class(confusion_matrix)

		# finish
		self.coord.request_stop()
		self.coord.join(threads)
Ejemplo n.º 29
0
    def __init__(self, WindowSize=5, FeaturesDimension=41):

        # features normalization values
        self.mean_vect = np.load(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/TrainingSetMean.npy'))
        self.stdev_vect = np.load(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/TrainingSetStDev.npy'))

        # TF graph initialization
        self.config = Configuration(WindowSize, FeaturesDimension)

        self.graph = tf.Graph()

        with self.graph.as_default():
            self.feat = tf.placeholder(dtype=tf.float32, shape=[1, self.config.audio_feat_dimension])
            with tf.variable_scope('model'):
                model = VAD_DNN.Model(self.feat, self.config)

            logits_prob = model.softmax

            # the probability of speech is given by the first dimension in the softmax
            # so we slice the output accordingly
            self.speech_prob = tf.slice(logits_prob, [0, 0], [-1, 1])

            init_op = tf.local_variables_initializer()
            saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True


        self.session = tf.Session(graph=self.graph, config=config)

        self.session.run(init_op)

        saver.restore(self.session,
                      os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/datamean_nodeltas_model_epoch13.ckpt"))
Ejemplo n.º 30
0
def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None):
  """Initialize or restore variables from a checkpoint if available.

  Args:
    sess: Session to initialize variables in.
    saver: Saver to restore variables.
    logdir: Directory to search for checkpoints.
    checkpoint: Specify what checkpoint name to use; defaults to most recent.
    resume: Whether to expect recovering a checkpoint or starting a new run.

  Raises:
    ValueError: If resume expected but no log directory specified.
    RuntimeError: If no resume expected but a checkpoint was found.
  """
  sess.run(tf.group(
      tf.local_variables_initializer(),
      tf.global_variables_initializer()))
  if resume and not (logdir or checkpoint):
    raise ValueError('Need to specify logdir to resume a checkpoint.')
  if logdir:
    state = tf.train.get_checkpoint_state(logdir)
    if checkpoint:
      checkpoint = os.path.join(logdir, checkpoint)
    if not checkpoint and state and state.model_checkpoint_path:
      checkpoint = state.model_checkpoint_path
    if checkpoint and resume is False:
      message = 'Found unexpected checkpoint when starting a new run.'
      raise RuntimeError(message)
    if checkpoint:
      saver.restore(sess, checkpoint)
Ejemplo n.º 31
0
    def __call__(self, inputs, training):
        """Add operations to classify a batch of input images.

    Args:
      inputs: A Tensor representing a batch of input images.
      training: A boolean. Set to True to add operations required only when
        training the classifier.

    Returns:
      A logits Tensor with shape [<batch_size>, self.num_classes].
    """
        print("Resnet Version={}".format(self.resnet_version))
        print("data Format={}".format(self.data_format))
        print()
        with self._model_variable_scope():
            # with tf.variable_scope('resnet_model'):
            if self.data_format == 'channels_first':
                # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
                # This provides a large performance boost on GPU. See
                # https://www.tensorflow.org/performance/performance_guide#data_formats
                inputs = tf.transpose(inputs, [0, 3, 1, 2])

            inputs = conv2d_fixed_padding(inputs=inputs,
                                          filters=self.num_filters,
                                          kernel_size=self.kernel_size,
                                          strides=self.conv_stride,
                                          data_format=self.data_format)
            # print(inputs)

            inputs = tf.identity(inputs, 'initial_conv')

            # We do not include batch normalization or activation functions in V2
            # for the initial conv1 because the first ResNet unit will perform these
            # for both the shortcut and non-shortcut paths as part of the first
            # block's projection. Cf. Appendix of [2].
            if self.resnet_version == 1:
                inputs = batch_norm(inputs, training, self.data_format)
                inputs = tf.nn.relu(inputs)

            if self.first_pool_size:
                inputs = tf.layers.max_pooling2d(
                    inputs=inputs,
                    pool_size=self.first_pool_size,
                    strides=self.first_pool_stride,
                    padding='SAME',
                    data_format=self.data_format)
                inputs = tf.identity(inputs, 'initial_max_pool')

            for i, num_blocks in enumerate(self.block_sizes):
                num_filters = self.num_filters * (2**i)
                inputs = block_layer(inputs=inputs,
                                     filters=num_filters,
                                     bottleneck=self.bottleneck,
                                     block_fn=self.block_fn,
                                     blocks=num_blocks,
                                     strides=self.block_strides[i],
                                     training=training,
                                     name='block_layer{}'.format(i + 1),
                                     data_format=self.data_format)
                # print(inputs)

            # Only apply the BN and ReLU for model that does pre_activation in each
            # building/bottleneck block, eg resnet V2.
            if self.pre_activation:
                inputs = batch_norm(inputs, training, self.data_format)
                inputs = tf.nn.relu(inputs)
                # print(inputs)

            # The current top layer has shape
            # `batch_size x pool_size x pool_size x final_size`.
            # ResNet does an Average Pooling layer over pool_size,
            # but that is the same as doing a reduce_mean. We do a reduce_mean
            # here because it performs better than AveragePooling2D.
            axes = [2, 3] if self.data_format == 'channels_first' else [1, 2]
            inputs = tf.reduce_mean(inputs, axes, keepdims=True)
            inputs = tf.identity(inputs, 'final_reduce_mean')

            inputs = tf.squeeze(inputs, axes)
            inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)
            inputs = tf.identity(inputs, 'final_dense')
            # print(inputs)
            print("End of __call")
            print("Number of classes {}".format(self.num_classes))
            print("Out shape {}".format(inputs.shape))
            print()
            print("len of trainble variables")
            tvar = [v for v in tf.trainable_variables()]
            print(len(tvar))

            tf_init_g = tf.global_variables_initializer()
            tf_init_l = tf.local_variables_initializer()
            self.sess.run(tf_init_g)
            self.sess.run(tf_init_l)
            inputs = self.sess.run(inputs)

            return inputs
Ejemplo n.º 32
0
def main(unused_argv):
    with tf.Graph().as_default():
        beta = 1e-5
        if platform.system() == 'Windows':
            print('Running on Windows')
            base_dir = os.path.join('E:\\', 'Program', 'Bite')
        elif platform.system() == 'Linux':
            print('Running on Linux')
            base_dir = os.path.join('/media', 'md0', 'xt1800i', 'Bite')
        else:
            print('Running on unsupported system')
            return

        tfrecord = os.path.join(base_dir, 'datasets', 'tfrecord', f'{FLAGS.training_file}.tfrecord')
        ckpt_dir = os.path.join(base_dir, 'ckpt')

        training_set = tfdata_generator(filename=tfrecord, batch_size=FLAGS.batch_size,
                                        aug=True).make_one_shot_iterator()
        validation_set = tfdata_generator(filename=tfrecord, batch_size=FLAGS.batch_size).make_one_shot_iterator()

        x_train = tf.placeholder(dtype=tf.float32, shape=[None, 299, 299, 3])
        y_label = tf.placeholder(dtype=tf.int32, shape=[None, 7])
        outputs = model_function(x_train)
        with tf.name_scope('loss'):
            cross_entropy = tf.reduce_sum(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=outputs, labels=tf.argmax(y_label, 1)))
            regularize = tf.add_n(tf.get_collection("losses"))
            # for var in tf.trainable_variables():
            #     print(var.name)
            # loss = tf.reduce_mean(cross_entropy + beta * regularize)
            loss = tf.add(cross_entropy, tf.multiply(beta, regularize))
        tf.summary.scalar('loss', loss)
        with tf.name_scope('accuracy'):
            correct_pred = tf.equal(tf.argmax(outputs, 1), tf.argmax(y_label, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=0.045, global_step=global_step,
                                                   staircase=True, decay_steps=int(FLAGS.num_image / FLAGS.batch_size),
                                                   decay_rate=0.96)
        optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss=loss, global_step=global_step)

        variables = tf.trainable_variables()
        # gradients = tf.gradients(loss,variables)
        # print(gradients)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter(os.path.join(base_dir, 'logs'), sess.graph)
            if FLAGS.ckpt is not None:
                print("restore ckpt . . .")
                saver.restore(sess, os.path.join(ckpt_dir, FLAGS.ckpt))
            else:
                print("new trainer . . .")
                sess.run(tf.global_variables_initializer())
                sess.run(tf.local_variables_initializer())
            train_next_element = training_set.get_next()
            val_next_element = validation_set.get_next()
            import numpy as np
            train_acc = 0
            while True:
                start = time.time()
                batch_image, batch_label = sess.run(train_next_element)
                _, i, a,l = sess.run([train_op, global_step, accuracy,loss],
                                    feed_dict={x_train: batch_image, y_label: batch_label})

                train_acc += a
                print('--------------------------------------')
                # print(L2)
                # print(v[0][0][0][0][0:3])
                # print(g[0][0][0][0][0:3])
                if i % 10 == 0:
                    print(f'time = {time.time() - start}, iterator = {i}, Loss = {l}, Acc = {train_acc/10}')
                    train_acc =0
                if i % 50 == 0:
                    rs = sess.run(merge,
                                  feed_dict={x_train: batch_image, y_label: batch_label})
                    writer.add_summary(rs, i)
                if i % 100 == 0:
                    val_batch_image, val_batch_label = sess.run(val_next_element)
                    val_loss, val_acc = sess.run(
                        [loss, accuracy],
                        feed_dict={x_train: val_batch_image, y_label: val_batch_label})
                    print(f'iterator= {i}, val_Loss = {val_loss}, val_Acc ={val_acc}')
                if i % 500 == 0:
                    saver.save(sess, os.path.join(ckpt_dir, f'model-{i}.ckpt'))
Ejemplo n.º 33
0
    def initialize(self, config, num_classes=None):
        '''
            Initialize the graph from scratch according to config.
        '''
        with self.graph.as_default():
            with self.sess.as_default():
                # Set up placeholders
                h, w = config.image_size
                channels = config.channels
                self.images_A = tf.placeholder(tf.float32,
                                               shape=[None, h, w, channels],
                                               name='images_A')
                self.images_B = tf.placeholder(tf.float32,
                                               shape=[None, h, w, channels],
                                               name='images_B')
                self.labels_A = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='labels_A')
                self.labels_B = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='labels_B')
                self.scales_A = tf.placeholder(tf.float32,
                                               shape=[None],
                                               name='scales_A')
                self.scales_B = tf.placeholder(tf.float32,
                                               shape=[None],
                                               name='scales_B')

                self.learning_rate = tf.placeholder(tf.float32,
                                                    name='learning_rate')
                self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
                self.phase_train = tf.placeholder(tf.bool, name='phase_train')
                self.global_step = tf.Variable(0,
                                               trainable=False,
                                               dtype=tf.int32,
                                               name='global_step')

                self.setup_network_model(config, num_classes)

                # Build generator
                encode_A, styles_A = self.encoder(self.images_A)
                encode_B, styles_B = self.encoder(self.images_B)

                deform_BA, render_BA, ldmark_pred, ldmark_diff = self.decoder(
                    encode_B, self.scales_B, None)
                render_AA = self.decoder(encode_A,
                                         self.scales_A,
                                         styles_A,
                                         texture_only=True)
                render_BB = self.decoder(encode_B,
                                         self.scales_B,
                                         styles_B,
                                         texture_only=True)

                self.styles_A = tf.identity(styles_A, name='styles_A')
                self.styles_B = tf.identity(styles_B, name='styles_B')
                self.deform_BA = tf.identity(deform_BA, name='deform_BA')
                self.ldmark_pred = tf.identity(ldmark_pred, name='ldmark_pred')
                self.ldmark_diff = tf.identity(ldmark_diff, name='ldmark_diff')

                # Build discriminator for real images
                patch_logits_A, logits_A = self.discriminator(self.images_A)
                patch_logits_B, logits_B = self.discriminator(self.images_B)
                patch_logits_BA, logits_BA = self.discriminator(deform_BA)

                # Show images in TensorBoard
                image_grid_A = tf.stack([self.images_A, render_AA], axis=1)[:1]
                image_grid_B = tf.stack([self.images_B, render_BB], axis=1)[:1]
                image_grid_BA = tf.stack([self.images_B, deform_BA],
                                         axis=1)[:1]
                image_grid = tf.concat(
                    [image_grid_A, image_grid_B, image_grid_BA], axis=0)
                image_grid = tf.reshape(image_grid,
                                        [-1] + list(self.images_A.shape[1:]))
                image_grid = self.image_grid(image_grid, (3, 2))
                tf.summary.image('image_grid', image_grid)

                # Build all losses
                self.watch_list = {}
                loss_list_G = []
                loss_list_D = []

                # Advesarial loss for deform_BA
                loss_D, loss_G = self.cls_adv_loss(logits_A, logits_B,
                                                   logits_BA, self.labels_A,
                                                   self.labels_B,
                                                   self.labels_B, num_classes)
                loss_D, loss_G = config.coef_adv * loss_D, config.coef_adv * loss_G

                self.watch_list['LDg'] = loss_D
                self.watch_list['LGg'] = loss_G
                loss_list_D.append(loss_D)
                loss_list_G.append(loss_G)

                # Patch Advesarial loss for deform_BA
                loss_D, loss_G = self.patch_adv_loss(patch_logits_A,
                                                     patch_logits_B,
                                                     patch_logits_BA)
                loss_D, loss_G = config.coef_patch_adv * loss_D, config.coef_patch_adv * loss_G

                self.watch_list['LDp'] = loss_D
                self.watch_list['LGp'] = loss_G
                loss_list_D.append(loss_D)
                loss_list_G.append(loss_G)

                # Identity Mapping (Reconstruction) loss
                loss_idt_A = tf.reduce_mean(tf.abs(render_AA - self.images_A),
                                            name='idt_loss_A')
                loss_idt_A = config.coef_idt * loss_idt_A

                loss_idt_B = tf.reduce_mean(tf.abs(render_BB - self.images_B),
                                            name='idt_loss_B')
                loss_idt_B = config.coef_idt * loss_idt_B

                self.watch_list['idtA'] = loss_idt_A
                self.watch_list['idtB'] = loss_idt_B
                loss_list_G.append(loss_idt_A + loss_idt_B)

                # Collect all losses
                reg_loss = tf.reduce_sum(tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES),
                                         name='reg_loss')
                self.watch_list['reg_loss'] = reg_loss
                loss_list_G.append(reg_loss)
                loss_list_D.append(reg_loss)

                loss_G = tf.add_n(loss_list_G, name='loss_G')
                grads_G = tf.gradients(loss_G, self.G_vars)

                loss_D = tf.add_n(loss_list_D, name='loss_D')
                grads_D = tf.gradients(loss_D, self.D_vars)

                # Training Operaters
                train_ops = []

                opt_G = tf.train.AdamOptimizer(self.learning_rate,
                                               beta1=0.5,
                                               beta2=0.9)
                opt_D = tf.train.AdamOptimizer(self.learning_rate,
                                               beta1=0.5,
                                               beta2=0.9)
                apply_G_gradient_op = opt_G.apply_gradients(
                    list(zip(grads_G, self.G_vars)))
                apply_D_gradient_op = opt_D.apply_gradients(
                    list(zip(grads_D, self.D_vars)))

                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                train_ops.extend([apply_G_gradient_op, apply_D_gradient_op] +
                                 update_ops)

                train_ops.append(tf.assign_add(self.global_step, 1))
                self.train_op = tf.group(*train_ops)

                # Collect TF summary
                for k, v in self.watch_list.items():
                    tf.summary.scalar('losses/' + k, v)
                tf.summary.scalar('learning_rate', self.learning_rate)
                self.summary_op = tf.summary.merge_all()

                # Initialize variables
                self.sess.run(tf.local_variables_initializer())
                self.sess.run(tf.global_variables_initializer())
                self.saver = tf.train.Saver(tf.trainable_variables(),
                                            max_to_keep=99)
Ejemplo n.º 34
0
def _evaluate_experiment(name, input_fn, data_input):
    normalize_fn = data_input._normalize_image
    resized_h = data_input.dims[0]
    resized_w = data_input.dims[1]

    current_config = config_dict('../config.ini')
    exp_dir = os.path.join(current_config['dirs']['log'], 'ex', name)
    config_path = os.path.join(exp_dir, 'config.ini')
    if not os.path.isfile(config_path):
        config_path = '../config.ini'
    if not os.path.isdir(exp_dir) or not tf.train.get_checkpoint_state(exp_dir):
        exp_dir = os.path.join(current_config['dirs']['checkpoints'], name)
    config = config_dict(config_path)
    params = config['train']
    convert_input_strings(params, config_dict('../config.ini')['dirs'])
    dataset_params_name = 'train_' + FLAGS.dataset
    if dataset_params_name in config:
        params.update(config[dataset_params_name])
    ckpt = tf.train.get_checkpoint_state(exp_dir)
    if not ckpt:
        raise RuntimeError("Error: experiment must contain a checkpoint")
    ckpt_path = exp_dir + "/" + os.path.basename(ckpt.model_checkpoint_path)

    with tf.Graph().as_default(): #, tf.device('gpu:' + FLAGS.gpu):
        inputs = input_fn()
        im1, im2, input_shape = inputs[:3]
        truth = inputs[3:]

        height, width, _ = tf.unstack(tf.squeeze(input_shape), num=3, axis=0)
        im1 = resize_input(im1, height, width, resized_h, resized_w)
        im2 = resize_input(im2, height, width, resized_h, resized_w) # TODO adapt train.py

        _, flow, flow_bw = unsupervised_loss(
            (im1, im2),
            normalization=data_input.get_normalization(),
            params=params, augment=False, return_flow=True)

        im1 = resize_output(im1, height, width, 3)
        im2 = resize_output(im2, height, width, 3)
        flow = resize_output_flow(flow, height, width, 2)
        flow_bw = resize_output_flow(flow_bw, height, width, 2)

        flow_fw_int16 = flow_to_int16(flow)
        flow_bw_int16 = flow_to_int16(flow_bw)

        im1_pred = image_warp(im2, flow)
        im1_diff = tf.abs(im1 - im1_pred)
        #im2_diff = tf.abs(im1 - im2)

        #flow_bw_warped = image_warp(flow_bw, flow)

        if len(truth) == 4:
            flow_occ, mask_occ, flow_noc, mask_noc = truth
            flow_occ = resize_output_crop(flow_occ, height, width, 2)
            flow_noc = resize_output_crop(flow_noc, height, width, 2)
            mask_occ = resize_output_crop(mask_occ, height, width, 1)
            mask_noc = resize_output_crop(mask_noc, height, width, 1)

            #div = divergence(flow_occ)
            #div_bw = divergence(flow_bw)
            occ_pred = 1 - (1 - occlusion(flow, flow_bw)[0])
            def_pred = 1 - (1 - occlusion(flow, flow_bw)[1])
            disocc_pred = forward_warp(flow_bw) < DISOCC_THRESH
            disocc_fw_pred = forward_warp(flow) < DISOCC_THRESH
            image_slots = [((im1 * 0.5 + im2 * 0.5) / 255, 'overlay'),
                           (im1_diff / 255, 'brightness error'),
                           #(im1 / 255, 'first image', 1, 0),
                           #(im2 / 255, 'second image', 1, 0),
                           #(im2_diff / 255, '|first - second|', 1, 2),
                           (flow_to_color(flow), 'flow'),
                           #(flow_to_color(flow_bw), 'flow bw prediction'),
                           #(tf.image.rgb_to_grayscale(im1_diff) > 20, 'diff'),
                           #(occ_pred, 'occ'),
                           #(def_pred, 'disocc'),
                           #(disocc_pred, 'reverse disocc'),
                           #(disocc_fw_pred, 'forward disocc prediction'),
                           #(div, 'div'),
                           #(div < -2, 'neg div'),
                           #(div > 5, 'pos div'),
                           #(flow_to_color(flow_occ, mask_occ), 'flow truth'),
                           (flow_error_image(flow, flow_occ, mask_occ, mask_noc),
                            'flow error') #  (blue: correct, red: wrong, dark: occluded)
            ]

            # list of (scalar_op, title)
            scalar_slots = [(flow_error_avg(flow_noc, flow, mask_noc), 'EPE_noc'),
                            (flow_error_avg(flow_occ, flow, mask_occ), 'EPE_all'),
                            (outlier_pct(flow_noc, flow, mask_noc), 'outliers_noc'),
                            (outlier_pct(flow_occ, flow, mask_occ), 'outliers_all')]
        elif len(truth) == 2:
            flow_gt, mask = truth
            flow_gt = resize_output_crop(flow_gt, height, width, 2)
            mask = resize_output_crop(mask, height, width, 1)

            image_slots = [((im1 * 0.5 + im2 * 0.5) / 255, 'overlay'),
                           (im1_diff / 255, 'brightness error'),
                           (flow_to_color(flow), 'flow'),
                           (flow_to_color(flow_gt, mask), 'gt'),
            ]

            # list of (scalar_op, title)
            scalar_slots = [(flow_error_avg(flow_gt, flow, mask), 'EPE_all')]
        else:
            image_slots = [(im1 / 255, 'first image'),
                           #(im1_pred / 255, 'warped second image', 0, 1),
                           (im1_diff / 255, 'warp error'),
                           #(im2 / 255, 'second image', 1, 0),
                           #(im2_diff / 255, '|first - second|', 1, 2),
                           (flow_to_color(flow), 'flow prediction')]
            scalar_slots = []

        num_ims = len(image_slots)
        image_ops = [t[0] for t in image_slots]
        scalar_ops = [t[0] for t in scalar_slots]
        image_names = [t[1] for t in image_slots]
        scalar_names = [t[1] for t in scalar_slots]
        all_ops = image_ops + scalar_ops

        image_lists = []
        averages = np.zeros(len(scalar_ops))
        sess_config = tf.ConfigProto(allow_soft_placement=True)

        exp_out_dir = os.path.join('../out', name)
        if FLAGS.output_visual or FLAGS.output_benchmark:
            if os.path.isdir(exp_out_dir):
                shutil.rmtree(exp_out_dir)
            os.makedirs(exp_out_dir)
            shutil.copyfile(config_path, os.path.join(exp_out_dir, 'config.ini'))

        with tf.Session(config=sess_config) as sess:
            saver = tf.train.Saver(tf.global_variables())
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            restore_networks(sess, params, ckpt, ckpt_path)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess,
                                                   coord=coord)

            # TODO adjust for batch_size > 1 (also need to change image_lists appending)
            max_iter = FLAGS.num if FLAGS.num > 0 else None

            try:
                num_iters = 0
                while not coord.should_stop() and (max_iter is None or num_iters != max_iter):
                    all_results = sess.run([flow, flow_bw, flow_fw_int16, flow_bw_int16] + all_ops)
                    flow_fw_res, flow_bw_res, flow_fw_int16_res, flow_bw_int16_res = all_results[:4]
                    all_results = all_results[4:]
                    image_results = all_results[:num_ims]
                    scalar_results = all_results[num_ims:]
                    iterstr = str(num_iters).zfill(6)
                    if FLAGS.output_visual:
                        path_col = os.path.join(exp_out_dir, iterstr + '_flow.png')
                        path_overlay = os.path.join(exp_out_dir, iterstr + '_img.png')
                        path_error = os.path.join(exp_out_dir, iterstr + '_err.png')
                        write_rgb_png(image_results[0] * 255, path_overlay)
                        write_rgb_png(image_results[1] * 255, path_col)
                        write_rgb_png(image_results[2] * 255, path_error)
                    if FLAGS.output_benchmark:
                        path_fw = os.path.join(exp_out_dir, iterstr)
                        if FLAGS.output_png:
                            write_rgb_png(flow_fw_int16_res, path_fw  + '_10.png', bitdepth=16)
                        else:
                            write_flo(flow_fw_res, path_fw + '_10.flo')
                        if FLAGS.output_backward:
                            path_fw = os.path.join(exp_out_dir, iterstr + '_01.png')
                            write_rgb_png(flow_bw_int16_res, path_bw, bitdepth=16)
                    if num_iters < FLAGS.num_vis:
                        image_lists.append(image_results)
                    averages += scalar_results
                    if num_iters > 0:
                        sys.stdout.write('\r')
                    num_iters += 1
                    sys.stdout.write("-- evaluating '{}': {}/{}"
                                     .format(name, num_iters, max_iter))
                    sys.stdout.flush()
                    print()
            except tf.errors.OutOfRangeError:
                pass

            averages /= num_iters

            coord.request_stop()
            coord.join(threads)

    for t, avg in zip(scalar_slots, averages):
        _, scalar_name = t
        print("({}) {} = {}".format(name, scalar_name, avg))

    return image_lists, image_names
Ejemplo n.º 35
0
output = network.outputs

loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output)

train_op = tf.train.AdamOptimizer(learning_rate=LR).minimize(loss)

accuracy = tf.metrics.accuracy(
    labels=tf.argmax(tf_y, axis=1),
    predictions=tf.argmax(output, axis=1),
)[1]

sess = tf.Session()

init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())

sess.run(init_op)

for step in range(600):

    b_x, b_y = mnist.train.next_batch(BATCH_SIZE)

    _, loss_ = sess.run([train_op, loss], {tf_x: b_x, tf_y: b_y})

    if step % 50 == 0:

        accuracy_ = sess.run(accuracy, {tf_x: test_x, tf_y: test_y})
        #        print('test accuracy: %.2f' , accuracy_)

        print('Step:', step, '| train loss: %.4f' % loss_,
Ejemplo n.º 36
0
def merge_checkpoint_file():
    dg = tf.Graph()
    with dg.as_default():
        x, y_ = mnist_dataset.placeholder_inputs(external.FLAGS.batch_size)
        # Build the graph for the deep net
        y_conv, _ = mnist_deep.deepnn(x)
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.to_int64(y_), logits=y_conv))

        # The op for initializing the variables.
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        sess = tf.Session()
        sess.run(init_op)
        # with tf.variable_scope('hidden1', reuse=True):
        #   weights = tf.get_variable('weights')
        #   biases = tf.get_variable('biases')
        # restore_saver = tf.train.Saver({'hidden1/weights': weights, 'hidden1/biases': biases})
        restore_saver = tf.train.Saver(var_list=tf.trainable_variables())
        multi_models = []
        for filename in external.FLAGS.model_list:
            checkpoint_file = os.path.join(external.FLAGS.train_dir, filename)
            restore_saver.restore(sess, checkpoint_file)
            # weights1 = weights.eval(session=sess)
            # print(weights)
            # print(type(weights1))
            # print(weights1)

            gc = tf.trainable_variables()
            # print(gc)
            model_now = []
            for var in gc:
                # print(var)
                var_v = var.eval(session=sess)
                # print(var_v)
                model_now.append(var_v)
                # print(model_now)
            multi_models.append(model_now)

        # assign_op = weights.assign((weights1 + weights2)/2)
        # sess.run(assign_op)
        # print(weights)
        # print(dg.get_tensor_by_name(name='hidden1/weights:0').eval(session=sess))
        # print(weights.eval(session=sess))

        multi_models_mean = np.mean(multi_models, axis=0)
        # print('Model mean:')
        # print(multi_models_mean)

        for i, _ in enumerate(gc):
            sess.run(gc[i].assign(multi_models_mean[i]))
        t = sess.run(gc[5])
        print(t)

        mean_ckpt_file = os.path.join(external.FLAGS.train_dir,
                                      external.FLAGS.merged_model)
        restore_saver.save(sess, mean_ckpt_file)
        print('Merged model saved.')
        sess.close()
Ejemplo n.º 37
0
Archivo: ch.py Proyecto: myw8/AIcamp
def validation():
    print('validation')
    test_feeder = DataIterator(data_dir='./data/test/')

    final_predict_val = []
    final_predict_index = []
    groundtruth = []
    result = []
    with tf.Session() as sess:
        test_images, test_labels, test_names = test_feeder.input_pipeline(
            batch_size=FLAGS.batch_size, num_epochs=1)
        graph = build_graph(1)

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer()
                 )  # initialize test_feeder's inside state

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
        if ckpt:
            saver.restore(sess, ckpt)
            print("restore from the checkpoint {0}".format(ckpt))

        logger.info('===Start validation===')

        try:
            i = 0
            acc_top_1, acc_top_k = 0.0, 0.0
            while not coord.should_stop():
                i += 1
                temp_dict = {}
                start_time = time.time()
                test_images_batch, test_labels_batch, test_names_batch = sess.run(
                    [test_images, test_labels, test_names])
                feed_dict = {
                    graph['images']: test_images_batch,
                    graph['labels']: test_labels_batch,
                    graph['names']: test_names_batch,
                    graph['keep_prob']: 1.0
                }
                batch_labels, batch_names, probs, indices, acc_1, acc_k = sess.run(
                    [
                        graph['labels'], graph['names'],
                        graph['predicted_val_top_k'],
                        graph['predicted_index_top_k'], graph['accuracy'],
                        graph['accuracy_top_k']
                    ],
                    feed_dict=feed_dict)
                final_predict_val += probs.tolist()
                final_predict_index += indices.tolist()
                groundtruth += batch_labels.tolist()
                temp_dict['filename'] = batch_names.tolist()
                temp_dict['label'] = indices.tolist()
                print(temp_dict)
                result.append(temp_dict)
                acc_top_1 += acc_1
                acc_top_k += acc_k
                end_time = time.time()
                logger.info(
                    "the batch {0} takes {1} seconds, accuracy = {2}(top_1) {3}(top_k)"
                    .format(i, end_time - start_time, acc_1, acc_k))

        except tf.errors.OutOfRangeError:
            logger.info(
                '==================Validation Finished================')
            acc_top_1 = acc_top_1 * FLAGS.batch_size / test_feeder.size
            acc_top_k = acc_top_k * FLAGS.batch_size / test_feeder.size
            logger.info('top 1 accuracy {0} top k accuracy {1}'.format(
                acc_top_1, acc_top_k))
        finally:
            coord.request_stop()
        coord.join(threads)

    #return {'prob': final_predict_val, 'indices': final_predict_index, 'groundtruth': groundtruth}
    return result
Ejemplo n.º 38
0
Archivo: ch.py Proyecto: myw8/AIcamp
def train():
    print('Begin training')
    train_feeder = DataIterator(data_dir='./data/test/')
    test_feeder = DataIterator(data_dir='./data/test/')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        train_images, train_labels, train_names = train_feeder.input_pipeline(
            batch_size=FLAGS.batch_size, aug=True)
        test_images, test_labels, train_names = test_feeder.input_pipeline(
            batch_size=FLAGS.batch_size)
        print(train_names)
        graph = build_graph(top_k=1)

        sess.run(
            tf.group(tf.global_variables_initializer(),
                     tf.local_variables_initializer()))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        saver = tf.train.Saver()

        train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/val')
        start_step = 0

        if FLAGS.restore:
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
            if ckpt:
                saver.restore(sess, ckpt)
                print("restore from the checkpoint {0}".format(ckpt))
                start_step += int(ckpt.split('-')[-1])

        logger.info('===Training Start===')
        try:
            while not coord.should_stop():
                start_time = time.time()
                train_images_batch, train_labels_batch = sess.run(
                    [train_images, train_labels])

                feed_dict = {
                    graph['images']: train_images_batch,
                    graph['labels']: train_labels_batch,
                    graph['keep_prob']: 0.8
                }
                _, loss_val, train_summary, step = sess.run(
                    [
                        graph['train_op'], graph['loss'],
                        graph['merged_summary_op'], graph['global_step']
                    ],
                    feed_dict=feed_dict)
                train_writer.add_summary(train_summary, step)
                end_time = time.time()
                logger.info("the step {0} takes {1} loss {2}".format(
                    step, end_time - start_time, loss_val))
                if step > FLAGS.max_steps:
                    break
                if step % FLAGS.eval_steps == 1:
                    test_images_batch, test_labels_batch = sess.run(
                        [test_images, test_labels])
                    feed_dict = {
                        graph['images']: test_images_batch,
                        graph['labels']: test_labels_batch,
                        graph['keep_prob']: 1.0
                    }
                    accuracy_test, test_summary = sess.run(
                        [graph['accuracy'], graph['merged_summary_op']],
                        feed_dict=feed_dict)
                    test_writer.add_summary(test_summary, step)
                    logger.info(
                        '===============Eval a batch=======================')
                    logger.info('the step {0} test accuracy: {1}'.format(
                        step, accuracy_test))
                    logger.info(
                        '===============Eval a batch=======================')
                if step % FLAGS.save_steps == 1:
                    logger.info('Save the ckpt of {0}'.format(step))
                    saver.save(sess,
                               os.path.join(FLAGS.checkpoint_dir, 'model'),
                               global_step=graph['global_step'])
        except tf.errors.OutOfRangeError:
            logger.info('==================Train Finished================')
            saver.save(sess,
                       os.path.join(FLAGS.checkpoint_dir, 'my-model'),
                       global_step=graph['global_step'])
        finally:
            coord.request_stop()
        coord.join(threads)
Ejemplo n.º 39
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_dir",
                        type=str,
                        required=True,
                        help="either a directory containing subdirectories "
                        "train, val, test, etc, or a directory containing "
                        "the tfrecords")
    parser.add_argument(
        "--val_input_dir",
        type=str,
        help="directories containing the tfrecords. default: input_dir")
    parser.add_argument("--logs_dir",
                        default='logs',
                        help="ignored if output_dir is specified")
    parser.add_argument(
        "--output_dir",
        help=
        "output directory where json files, summary, model, gifs, etc are saved. "
        "default is logs_dir/model_fname, where model_fname consists of "
        "information from model and model_hparams")
    parser.add_argument("--output_dir_postfix", default="")
    parser.add_argument(
        "--checkpoint",
        help=
        "directory with checkpoint or checkpoint name (e.g. checkpoint_dir/model-200000)"
    )
    parser.add_argument("--resume",
                        action='store_true',
                        help='resume from lastest checkpoint in output_dir.')

    parser.add_argument("--dataset", type=str, help="dataset class name")
    parser.add_argument(
        "--dataset_hparams",
        type=str,
        help="a string of comma separated list of dataset hyperparameters")
    parser.add_argument("--dataset_hparams_dict",
                        type=str,
                        help="a json file of dataset hyperparameters")
    parser.add_argument("--model", type=str, help="model class name")
    parser.add_argument(
        "--model_hparams",
        type=str,
        help="a string of comma separated list of model hyperparameters")
    parser.add_argument("--model_hparams_dict",
                        type=str,
                        help="a json file of model hyperparameters")

    parser.add_argument(
        "--summary_freq",
        type=int,
        default=1000,
        help=
        "save frequency of summaries (except for image and eval summaries) for train/validation set"
    )
    parser.add_argument(
        "--image_summary_freq",
        type=int,
        default=5000,
        help="save frequency of image summaries for train/validation set")
    parser.add_argument(
        "--eval_summary_freq",
        type=int,
        default=25000,
        help="save frequency of eval summaries for train/validation set")
    parser.add_argument(
        "--accum_eval_summary_freq",
        type=int,
        default=100000,
        help=
        "save frequency of accumulated eval summaries for validation set only")
    parser.add_argument("--progress_freq",
                        type=int,
                        default=100,
                        help="display progress every progress_freq steps")
    parser.add_argument("--save_freq",
                        type=int,
                        default=5000,
                        help="save frequence of model, 0 to disable")

    parser.add_argument(
        "--aggregate_nccl",
        type=int,
        default=0,
        help=
        "whether to use nccl or cpu for gradient aggregation in multi-gpu training"
    )
    parser.add_argument("--gpu_mem_frac",
                        type=float,
                        default=0.9,
                        help="fraction of gpu memory to use")
    parser.add_argument("--seed", type=int)

    args = parser.parse_args()

    if args.seed is not None:
        tf.set_random_seed(args.seed)
        np.random.seed(args.seed)
        random.seed(args.seed)

    if args.output_dir is None:
        list_depth = 0
        model_fname = ''
        for t in ('model=%s,%s' % (args.model, args.model_hparams)):
            if t == '[':
                list_depth += 1
            if t == ']':
                list_depth -= 1
            if list_depth and t == ',':
                t = '..'
            if t in '=,':
                t = '.'
            if t in '[]':
                t = ''
            model_fname += t
        args.output_dir = os.path.join(args.logs_dir,
                                       model_fname) + args.output_dir_postfix

    if args.resume:
        if args.checkpoint:
            raise ValueError('resume and checkpoint cannot both be specified')
        args.checkpoint = args.output_dir

    dataset_hparams_dict = {}
    model_hparams_dict = {}
    if args.dataset_hparams_dict:
        with open(args.dataset_hparams_dict) as f:
            dataset_hparams_dict.update(json.loads(f.read()))
    if args.model_hparams_dict:
        with open(args.model_hparams_dict) as f:
            model_hparams_dict.update(json.loads(f.read()))
    if args.checkpoint:
        checkpoint_dir = os.path.normpath(args.checkpoint)
        if not os.path.isdir(args.checkpoint):
            checkpoint_dir, _ = os.path.split(checkpoint_dir)
        if not os.path.exists(checkpoint_dir):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                    checkpoint_dir)
        with open(os.path.join(checkpoint_dir, "options.json")) as f:
            print("loading options from checkpoint %s" % args.checkpoint)
            options = json.loads(f.read())
            args.dataset = args.dataset or options['dataset']
            args.model = args.model or options['model']
        try:
            with open(os.path.join(checkpoint_dir,
                                   "dataset_hparams.json")) as f:
                dataset_hparams_dict.update(json.loads(f.read()))
        except FileNotFoundError:
            print(
                "dataset_hparams.json was not loaded because it does not exist"
            )
        try:
            with open(os.path.join(checkpoint_dir, "model_hparams.json")) as f:
                model_hparams_dict.update(json.loads(f.read()))
        except FileNotFoundError:
            print(
                "model_hparams.json was not loaded because it does not exist")

    print(
        '----------------------------------- Options ------------------------------------'
    )
    for k, v in args._get_kwargs():
        print(k, "=", v)
    print(
        '------------------------------------- End --------------------------------------'
    )

    VideoDataset = datasets.get_dataset_class(args.dataset)
    train_dataset = VideoDataset(args.input_dir,
                                 mode='train',
                                 hparams_dict=dataset_hparams_dict,
                                 hparams=args.dataset_hparams)
    val_dataset = VideoDataset(args.val_input_dir or args.input_dir,
                               mode='val',
                               hparams_dict=dataset_hparams_dict,
                               hparams=args.dataset_hparams)
    if val_dataset.hparams.long_sequence_length != val_dataset.hparams.sequence_length:
        # the longer dataset is only used for the accum_eval_metrics
        long_val_dataset = VideoDataset(args.val_input_dir or args.input_dir,
                                        mode='val',
                                        hparams_dict=dataset_hparams_dict,
                                        hparams=args.dataset_hparams)
        long_val_dataset.set_sequence_length(
            val_dataset.hparams.long_sequence_length)
    else:
        long_val_dataset = None

    variable_scope = tf.get_variable_scope()
    variable_scope.set_use_resource(True)

    VideoPredictionModel = models.get_model_class(args.model)
    hparams_dict = dict(model_hparams_dict)
    hparams_dict.update({
        'context_frames': train_dataset.hparams.context_frames,
        'sequence_length': train_dataset.hparams.sequence_length,
        'repeat': train_dataset.hparams.time_shift,
    })
    model = VideoPredictionModel(hparams_dict=hparams_dict,
                                 hparams=args.model_hparams,
                                 aggregate_nccl=args.aggregate_nccl)

    batch_size = model.hparams.batch_size
    train_tf_dataset = train_dataset.make_dataset(batch_size)
    train_iterator = train_tf_dataset.make_one_shot_iterator()
    train_handle = train_iterator.string_handle()
    val_tf_dataset = val_dataset.make_dataset(batch_size)
    val_iterator = val_tf_dataset.make_one_shot_iterator()
    val_handle = val_iterator.string_handle()
    iterator = tf.data.Iterator.from_string_handle(
        train_handle, train_tf_dataset.output_types,
        train_tf_dataset.output_shapes)
    inputs = iterator.get_next()

    # inputs comes from the training dataset by default, unless train_handle is remapped to the val_handles
    model.build_graph(inputs)

    if long_val_dataset is not None:
        # separately build a model for the longer sequence.
        # this is needed because the model doesn't support dynamic shapes.
        long_hparams_dict = dict(hparams_dict)
        long_hparams_dict[
            'sequence_length'] = long_val_dataset.hparams.sequence_length
        # use smaller batch size for longer model to prevenet running out of memory
        long_hparams_dict['batch_size'] = model.hparams.batch_size // 2
        long_model = VideoPredictionModel(
            mode="test",  # to not build the losses and discriminators
            hparams_dict=long_hparams_dict,
            hparams=args.model_hparams,
            aggregate_nccl=args.aggregate_nccl)
        tf.get_variable_scope().reuse_variables()
        long_model.build_graph(long_val_dataset.make_batch(batch_size))
    else:
        long_model = None

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    with open(os.path.join(args.output_dir, "options.json"), "w") as f:
        f.write(json.dumps(vars(args), sort_keys=True, indent=4))
    with open(os.path.join(args.output_dir, "dataset_hparams.json"), "w") as f:
        f.write(
            json.dumps(train_dataset.hparams.values(),
                       sort_keys=True,
                       indent=4))
    with open(os.path.join(args.output_dir, "model_hparams.json"), "w") as f:
        f.write(json.dumps(model.hparams.values(), sort_keys=True, indent=4))

    with tf.name_scope("parameter_count"):
        # exclude trainable variables that are replicas (used in multi-gpu setting)
        trainable_variables = set(tf.trainable_variables()) & set(
            model.saveable_variables)
        parameter_count = tf.reduce_sum(
            [tf.reduce_prod(tf.shape(v)) for v in trainable_variables])

    saver = tf.train.Saver(var_list=model.saveable_variables, max_to_keep=2)

    # None has the special meaning of evaluating at the end, so explicitly check for non-equality to zero
    if (args.summary_freq != 0 or args.image_summary_freq != 0
            or args.eval_summary_freq != 0
            or args.accum_eval_summary_freq != 0):
        summary_writer = tf.summary.FileWriter(args.output_dir)

    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=args.gpu_mem_frac)
    config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)
    global_step = tf.train.get_or_create_global_step()
    max_steps = model.hparams.max_steps
    with tf.Session(config=config) as sess:
        print("parameter_count =", sess.run(parameter_count))

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        model.restore(sess, args.checkpoint)
        sess.run(model.post_init_ops)
        val_handle_eval = sess.run(val_handle)
        sess.graph.finalize()

        start_step = sess.run(global_step)

        def should(step, freq):
            if freq is None:
                return (step + 1) == (max_steps - start_step)
            else:
                return freq and ((step + 1) % freq == 0 or
                                 (step + 1) in (0, max_steps - start_step))

        def should_eval(step, freq):
            # never run eval summaries at the beginning since it's expensive, unless it's the last iteration
            return should(step,
                          freq) and (step >= 0 or
                                     (step + 1) == (max_steps - start_step))

        # start at one step earlier to log everything without doing any training
        # step is relative to the start_step
        for step in range(-1, max_steps - start_step):
            if step == 1:
                # skip step -1 and 0 for timing purposes (for warmstarting)
                start_time = time.time()

            fetches = {"global_step": global_step}
            if step >= 0:
                fetches["train_op"] = model.train_op
            if should(step, args.progress_freq):
                fetches['d_loss'] = model.d_loss
                fetches['g_loss'] = model.g_loss
                fetches['d_losses'] = model.d_losses
                fetches['g_losses'] = model.g_losses
                if isinstance(model.learning_rate, tf.Tensor):
                    fetches["learning_rate"] = model.learning_rate
            if should(step, args.summary_freq):
                fetches["summary"] = model.summary_op
            if should(step, args.image_summary_freq):
                fetches["image_summary"] = model.image_summary_op
            if should_eval(step, args.eval_summary_freq):
                fetches["eval_summary"] = model.eval_summary_op

            run_start_time = time.time()
            print(step)
            results = sess.run(fetches)
            run_elapsed_time = time.time() - run_start_time
            if run_elapsed_time > 1.5 and step > 0 and set(
                    fetches.keys()) == {"global_step", "train_op"}:
                print('running train_op took too long (%0.1fs)' %
                      run_elapsed_time)

            if (should(step, args.summary_freq)
                    or should(step, args.image_summary_freq)
                    or should_eval(step, args.eval_summary_freq)):
                val_fetches = {"global_step": global_step}
                if should(step, args.summary_freq):
                    val_fetches["summary"] = model.summary_op
                if should(step, args.image_summary_freq):
                    val_fetches["image_summary"] = model.image_summary_op
                if should_eval(step, args.eval_summary_freq):
                    val_fetches["eval_summary"] = model.eval_summary_op
                val_results = sess.run(
                    val_fetches, feed_dict={train_handle: val_handle_eval})
                for name, summary in val_results.items():
                    if name == 'global_step':
                        continue
                    val_results[name] = add_tag_suffix(summary, '_1')

            if should(step, args.summary_freq):
                print("recording summary")
                summary_writer.add_summary(results["summary"],
                                           results["global_step"])
                summary_writer.add_summary(val_results["summary"],
                                           val_results["global_step"])
                print("done")
            if should(step, args.image_summary_freq):
                print("recording image summary")
                summary_writer.add_summary(results["image_summary"],
                                           results["global_step"])
                summary_writer.add_summary(val_results["image_summary"],
                                           val_results["global_step"])
                print("done")
            if should_eval(step, args.eval_summary_freq):
                print("recording eval summary")
                summary_writer.add_summary(results["eval_summary"],
                                           results["global_step"])
                summary_writer.add_summary(val_results["eval_summary"],
                                           val_results["global_step"])
                print("done")
            if should_eval(step, args.accum_eval_summary_freq):
                val_datasets = [val_dataset]
                val_models = [model]
                if long_model is not None:
                    val_datasets.append(long_val_dataset)
                    val_models.append(long_model)
                for i, (val_dataset_,
                        val_model) in enumerate(zip(val_datasets, val_models)):
                    sess.run(val_model.accum_eval_metrics_reset_op)
                    # traverse (roughly up to rounding based on the batch size) all the validation dataset
                    accum_eval_summary_num_updates = val_dataset_.num_examples_per_epoch(
                    ) // val_model.hparams.batch_size
                    val_fetches = {
                        "global_step": global_step,
                        "accum_eval_summary": val_model.accum_eval_summary_op
                    }
                    for update_step in range(accum_eval_summary_num_updates):
                        print(
                            'evaluating %d / %d' %
                            (update_step + 1, accum_eval_summary_num_updates))
                        val_results = sess.run(
                            val_fetches,
                            feed_dict={train_handle: val_handle_eval})
                    accum_eval_summary = add_tag_suffix(
                        val_results["accum_eval_summary"], '_%d' % (i + 1))
                    print("recording accum eval summary")
                    summary_writer.add_summary(accum_eval_summary,
                                               val_results["global_step"])
                    print("done")
            if (should(step, args.summary_freq)
                    or should(step, args.image_summary_freq)
                    or should_eval(step, args.eval_summary_freq)
                    or should_eval(step, args.accum_eval_summary_freq)):
                summary_writer.flush()
            if should(step, args.progress_freq):
                # global_step will have the correct step count if we resume from a checkpoint
                # global step is read before it's incremented
                steps_per_epoch = train_dataset.num_examples_per_epoch(
                ) / batch_size
                train_epoch = results["global_step"] / steps_per_epoch
                print("progress  global step %d  epoch %0.1f" %
                      (results["global_step"] + 1, train_epoch))
                if step > 0:
                    elapsed_time = time.time() - start_time
                    average_time = elapsed_time / step
                    images_per_sec = batch_size / average_time
                    remaining_time = (max_steps -
                                      (start_step + step + 1)) * average_time
                    print(
                        "          image/sec %0.1f  remaining %dm (%0.1fh) (%0.1fd)"
                        %
                        (images_per_sec, remaining_time / 60, remaining_time /
                         60 / 60, remaining_time / 60 / 60 / 24))

                if results['d_losses']:
                    print("d_loss", results["d_loss"])
                for name, loss in results['d_losses'].items():
                    print("  ", name, loss)
                if results['g_losses']:
                    print("g_loss", results["g_loss"])
                for name, loss in results['g_losses'].items():
                    print("  ", name, loss)
                if isinstance(model.learning_rate, tf.Tensor):
                    print("learning_rate", results["learning_rate"])

            if should(step, args.save_freq):
                print("saving model to", args.output_dir)
                saver.save(sess,
                           os.path.join(args.output_dir, "model"),
                           global_step=global_step)
                print("done")
Ejemplo n.º 40
0
def run_training():
    # Tell TensorFlow that the model will be built into the default Graph.
    validation_file_list = [
        FLAGS.data_path + 'validation_' + '%d.tfrecords' % i
        for i in range(1, 11)
    ]
    train_file_list = [
        FLAGS.data_path + 'train_' + '%d.tfrecords' % i for i in range(1, 11)
    ]
    test_file_list = [
        FLAGS.data_path + 'test_' + '%d.tfrecords' % i for i in range(1, 11)
    ]
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step(graph=None)
        with tf.device('/cpu:0'):
            batch_x_pos, batch_labels_pos, batch_iterator_pos = inputs_train(
                'train_*_pos.tfrecords')
            batch_x_neg, batch_labels_neg, batch_iterator_neg = inputs_train(
                'train_*_neg.tfrecords')
            batch_x = tf.concat([batch_x_pos, batch_x_neg], axis=0)
            batch_labels = tf.concat([batch_labels_pos, batch_labels_neg],
                                     axis=0)

            print(batch_x.get_shape(), batch_labels.get_shape())
            print(batch_labels.dtype, '====================')
            train_x, train_labels, train_iterator = inputs(
                train_file_list, FLAGS.train_size)
            validation_x, validation_labels, val_iterator = inputs(
                validation_file_list, FLAGS.valid_size)
            test_x, test_labels, test_iterator = inputs(
                test_file_list, FLAGS.test_size)
        print(batch_x.get_shape(), ', ', batch_labels.get_shape())
        with tf.device("/cpu:0"):
            train_logits, train_accuracy, train_probs, train_preds = model(
                batch_x,
                batch_labels,
                FLAGS.activation,
                FLAGS.h1,
                FLAGS.h2,
                FLAGS.h3,
                FLAGS.h4,
                FLAGS.h5,
                FLAGS.h6,
                FLAGS.h7,
                FLAGS.h8,
                FLAGS.h9,
                FLAGS.h10,
                istrain=True,
                reuse=False)
            tot_train_logits, tot_train_accuracy, tot_train_probs, tot_train_preds = model(
                train_x,
                train_labels,
                FLAGS.activation,
                FLAGS.h1,
                FLAGS.h2,
                FLAGS.h3,
                FLAGS.h4,
                FLAGS.h5,
                FLAGS.h6,
                FLAGS.h7,
                FLAGS.h8,
                FLAGS.h9,
                FLAGS.h10,
                istrain=False,
                reuse=True)
            train_logits_ev, train_accuracy_ev, train_probs_ev, train_preds_ev = model(
                batch_x,
                batch_labels,
                FLAGS.activation,
                FLAGS.h1,
                FLAGS.h2,
                FLAGS.h3,
                FLAGS.h4,
                FLAGS.h5,
                FLAGS.h6,
                FLAGS.h7,
                FLAGS.h8,
                FLAGS.h9,
                FLAGS.h10,
                istrain=False,
                reuse=True)
            cost = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=batch_labels,
                                                        logits=train_logits))
            l2_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = tf.reduce_sum(l2_loss)
            cost += l2_loss
            # cross entropy loss when drop out turned off!
            cost_ev_train = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=batch_labels, logits=train_logits_ev))
            tot_train_cost = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=train_labels, logits=tot_train_logits))
            # its like above cost except weights positive examples!
            # cost = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(batch_labels, train_logits, 1))
            learning_rate = tf.train.inverse_time_decay(
                FLAGS.learning_rate,
                global_step=global_step,
                decay_steps=FLAGS.decay_steps,
                decay_rate=FLAGS.decay_rate,
                staircase=True)
            train_op = tf.contrib.layers.optimize_loss(
                loss=cost,
                global_step=global_step,
                learning_rate=learning_rate,
                optimizer=tf.train.AdamOptimizer(),
                # clip_gradients=2.0,
                name='d_optimize_loss',
                variables=tf.trainable_variables())
            ema = tf.train.ExponentialMovingAverage(decay=0.999)
            # EMA weights:
            with tf.control_dependencies([train_op]):
                train_op_new = ema.apply(tf.trainable_variables())
            validation_logits, validation_accuracy, validation_probs, validation_preds = model(
                validation_x,
                validation_labels,
                FLAGS.activation,
                FLAGS.h1,
                FLAGS.h2,
                FLAGS.h3,
                FLAGS.h4,
                FLAGS.h5,
                FLAGS.h6,
                FLAGS.h7,
                FLAGS.h8,
                FLAGS.h9,
                FLAGS.h10,
                istrain=False,
                reuse=True)
            test_logits, test_accuracy, test_probs, test_preds = model(
                test_x,
                test_labels,
                FLAGS.activation,
                FLAGS.h1,
                FLAGS.h2,
                FLAGS.h3,
                FLAGS.h4,
                FLAGS.h5,
                FLAGS.h6,
                FLAGS.h7,
                FLAGS.h8,
                FLAGS.h9,
                FLAGS.h10,
                istrain=False,
                reuse=True)
            validation_cost = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=validation_labels, logits=validation_logits))

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)
        # allow to use smaller amount of GPU memory and grow if needed!
        config.gpu_options.allow_growth = False
        saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            valid_perfs = dict()
            metrics = [
                'pos_percision', 'pos_recall', 'pos_f1', 'neg_percision',
                'neg_recall', 'neg_f1', 'p_r_auc', 'auc'
            ]
            valid_perfs['pos_percision'] = []
            valid_perfs['pos_recall'] = []
            valid_perfs['pos_f1'] = []
            valid_perfs['neg_percision'] = []
            valid_perfs['neg_recall'] = []
            valid_perfs['neg_f1'] = []
            valid_perfs['p_r_auc'] = []
            valid_perfs['auc'] = []
            sess.run(init_op)
            # b, l = sess.run([batch_x, batch_labels])
            # np.savetxt(FLAGS.data_path + 'batch1.txt', b[:1000], delimiter='\t')
            # print(len(l), np.sum(l))
            # print('Shape of Batch=', b.shape, l.shape, batch_x.get_shape(), batch_labels.get_shape())
            checkpoint_rate = int(
                0.5 * (FLAGS.train_size // FLAGS.batch_size) // 100) * 100
            n_iterations = checkpoint_rate * FLAGS.num_epochs
            # checkpoint_rate = 10000
            print('Total iterations= ', n_iterations)
            best_validation_auc = 0
            train_per_epoch_loss = 0
            val_loss_list = []
            train_per_epoch_loss_list = []
            # pre training performance measure:
            tr_loss = sess.run(tot_train_cost)
            train_per_epoch_loss_list.append(tr_loss)
            val_probs, val_preds, val_labs, val_loss = sess.run([
                validation_probs, validation_preds, validation_labels,
                validation_cost
            ])
            val_loss_list.append(val_loss)
            validation_pref = performance_statistics(val_labs, val_preds,
                                                     val_probs)
            for m in range(len(metrics)):
                valid_perfs[metrics[m]].append(validation_pref[m])
            # training
            for i in range(n_iterations):
                start_time = time.time()
                batch_loss, batch_accu, _ = sess.run(
                    [cost_ev_train, train_accuracy_ev, train_op_new])
                train_per_epoch_loss += batch_loss
                duration = time.time() - start_time
                if i % 100 == 0:
                    print(
                        'Step %d: BATCH loss = %.3f, accuracy=%.3f (%.3f sec)'
                        % (i, batch_loss, batch_accu, duration))
                train_per_epoch_loss += batch_loss
                if i % 100 == 0:
                    print(
                        'Step %d: BATCH loss = %.2f, accuracy=%.2f (%.3f sec)'
                        % (i, batch_loss, batch_accu, duration))
                if (i + 1) % checkpoint_rate == 0:
                    t1 = time.time()
                    val_probs, val_preds, val_labs, val_loss = sess.run([
                        validation_probs, validation_preds, validation_labels,
                        validation_cost
                    ])
                    validation_pref = performance_statistics(
                        val_labs, val_preds, val_probs)
                    print('Epoch end validation performance:')
                    print(validation_pref)
                    val_loss_list.append(val_loss)

                    train_per_epoch_loss_list.append(train_per_epoch_loss /
                                                     checkpoint_rate)
                    train_per_epoch_loss = 0
                    for m in range(len(metrics)):
                        valid_perfs[metrics[m]].append(validation_pref[m])
                    if best_validation_auc < validation_pref[-1]:
                        print('-' * 100)
                        print(
                            'validation AUC improved from {} to {} '.format(
                                best_validation_auc, validation_pref[-1]),
                            '(%.3f sec)' % (time.time() - t1))
                        best_validation_auc = validation_pref[-1]
                        # print('Train loss improved from {} to {} '.format(best_train_loss, train_loss))
                        print('Saving Model to file..')
                        print('-' * 100)
                        saver.save(
                            sess,
                            'C:/behrouz/projects/behrouz-Rui-Gaurav-project/'
                            'excel-pbi-modeling/balanced-batch/checkpoint/model.ckpt'
                        )
            test_accu = sess.run(test_accuracy)
            train_val_loss = np.zeros((2, len(val_loss_list)))
            train_val_loss[0, :] = train_per_epoch_loss_list
            train_val_loss[1, :] = val_loss_list
            np.save(
                'C:/behrouz/projects/behrouz-Rui-Gaurav-project/'
                'excel-pbi-modeling/balanced-batch/train_val_loss.npy',
                train_val_loss)
            print(train_val_loss.shape)
            with open(
                    'C:/behrouz/projects/behrouz-Rui-Gaurav-project/'
                    'excel-pbi-modeling/balanced-batch/val_perfs.pickle',
                    'wb') as f:
                pickle.dump(valid_perfs, f)
            print('Out of Sample Accuracy= ', test_accu)
        sess.close()