def testWhileUpdateVariable_5(self):
    with self.test_session():
      # Create some variables.
      var_a = tf.Variable(0, name="a")
      var_b = tf.Variable(0, name="b")
      tf.initialize_all_variables().run()

      # Change condition to check var_b
      def pred(i):
        return tf.less(var_b, 10)

      # Change body to increment var_b
      def loop_body(i):
        asn1 = tf.assign_add(var_a, tf.constant(1), name="a_add")
        asn2 = tf.assign_add(var_b, tf.constant(1), name="b_add")
        with tf.control_dependencies([asn1, asn2]):
          inc_b = tf.identity(var_b)
        return inc_b

      lpa = control_flow_ops.While(pred, loop_body, [var_b], 1, name="loop")

      self.assertEqual(0, var_b.eval())
      lpa.eval()  # Run the loop
      self.assertEqual(10, var_a.eval())
      self.assertEqual(10, var_b.eval())
Example #2
0
 def computeGraph(self):
     logging.debug("computeGraph")
     with tf.Session(graph=self.graph) as session:
         # This is a one-time operation which ensures the parameters get initialized as
         # we described in the graph: random weights for the matrix, zeros for the
         # biases. 
         tf.initialize_all_variables().run()
         logging.debug('Initialized')
         for step in range(self.num_steps):
             # Run the computations. We tell .run() that we want to run the optimizer,
             # and get the loss value and the training predictions returned as numpy
             # arrays.
             _, l, predictions = session.run([self.optimizer, self.loss, self.train_prediction])
             if (step % 100 == 0):
                 logging.debug('Loss at step %d: %f' % (step, l))
                 logging.debug('Training accuracy: %.1f%%' % self.accuracy(
                 predictions, self.train_labels))
                 # Calling .eval() on valid_prediction is basically like calling run(), but
                 # just to get that one numpy array. Note that it recomputes all its graph
                 # dependencies.
                 logging.debug('Validation accuracy: %.1f%%' % self.accuracy(self.valid_prediction.eval(), self.valid_labels))
     
     
     
         logging.debug('Test accuracy: %.1f%%' % self.accuracy(self.test_prediction.eval(), self.test_labels))
     return
Example #3
0
  def testSharded(self):
    save_dir = os.path.join(self.get_temp_dir(), "max_to_keep_sharded")
    try:
      gfile.DeleteRecursively(save_dir)
    except gfile.GOSError as _:
      pass                      # Ignore
    gfile.MakeDirs(save_dir)

    with tf.Session(
        target="",
        config=tf.ConfigProto(device_count={"CPU": 2})) as sess:
      with sess.graph.device("/cpu:0"):
        v0 = tf.Variable(111, name="v0")
      with sess.graph.device("/cpu:1"):
        v1 = tf.Variable(222, name="v1")
      save = tf.train.Saver({"v0": v0, "v1": v1}, sharded=True, max_to_keep=2)
      tf.initialize_all_variables().run()
      self.assertEqual([], save.last_checkpoints)

      s1 = save.save(sess, os.path.join(save_dir, "s1"))
      self.assertEqual([s1], save.last_checkpoints)
      self.assertEquals(2, len(gfile.Glob(s1)))

      s2 = save.save(sess, os.path.join(save_dir, "s2"))
      self.assertEqual([s1, s2], save.last_checkpoints)
      self.assertEquals(2, len(gfile.Glob(s1)))
      self.assertEquals(2, len(gfile.Glob(s2)))

      s3 = save.save(sess, os.path.join(save_dir, "s3"))
      self.assertEqual([s2, s3], save.last_checkpoints)
      self.assertEquals(0, len(gfile.Glob(s1)))
      self.assertEquals(2, len(gfile.Glob(s2)))
      self.assertEquals(2, len(gfile.Glob(s3)))
  def testWhileUpdateVariable_6(self):
    with self.test_session():
      # Create some variables.
      var_a = tf.Variable(0, name="a")
      var_b = tf.Variable(0, name="b")
      c = tf.constant(0)
      tf.initialize_all_variables().run()

      # Loop condition
      def pred(i):
        return tf.less(i, 10)

      # Loop body
      def loop_body(i):
        asn1 = tf.assign_add(var_a, 1, name="a_add")
        with tf.control_dependencies([asn1]):
          asn2 = tf.assign_add(var_b, var_a, name="b_add")
        with tf.control_dependencies([asn2]):
          ni = tf.add(i, 1, name="i_add")
          return ni

      lpa = control_flow_ops.While(pred, loop_body, [c], 1, name="loop")

      self.assertEqual(0, var_b.eval())
      lpa.eval()  # Run the loop
      self.assertEqual(55, var_b.eval())
      self.assertEqual(10, var_a.eval())
Example #5
0
  def testInt64(self):
    save_path = os.path.join(self.get_temp_dir(), "int64")

    with self.test_session() as sess:
      # Build a graph with 1 node, and save and restore for them.
      v = tf.Variable(np.int64(15), name="v")
      save = tf.train.Saver({"v": v}, restore_sequentially=True)
      tf.initialize_all_variables().run()

      # Save the initialized values in the file at "save_path"
      val = save.save(sess, save_path)
      self.assertTrue(isinstance(val, six.string_types))
      self.assertEqual(save_path, val)

      with self.test_session() as sess:
        v = tf.Variable(np.int64(-1), name="v")
        save = tf.train.Saver({"v": v})

      with self.assertRaisesWithPredicateMatch(
          tf.OpError, lambda e: "uninitialized value v" in e.message):
        sess.run(v)

      # Restore the saved values in the parameter nodes.
      save.restore(sess, save_path)
      # Check that the parameter nodes have been restored.
      self.assertEqual(np.int64(15), v.eval())
def run_graph(device, input_shape, axes, num_layers, py, scale, train,
              num_iters):
  """Run the graph and print its execution time.

  Args:
    device: string, the device to run on.
    input_shape: shape of the input tensor.
    axes: axes that are to be normalized across.
    num_layers: number of batch normalization layers in the graph.
    py: whether to use the python implementation.
    scale: scale after normalization.
    train: if true, also run backprop.
    num_iters: number of steps to run.

  Returns:
    The duration of the run in seconds.
  """
  graph = tf.Graph()
  with graph.as_default():
    outputs = build_graph(device, input_shape, axes, num_layers, py, scale,
                          train)
  with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    _ = session.run([out.op for out in outputs])  # warm up.
    start_time = time.time()
    for _ in range(num_iters):
      _ = session.run([out.op for out in outputs])
  duration = time.time() - start_time
  print("%s shape:%d/%d #layers:%d python:%r scale:%r train:%r - %f secs" %
        (device, len(input_shape), len(axes), num_layers, py, scale, train,
         duration / num_iters))
  return duration
Example #7
0
    def testBlockGRUToGRUCellSingleStep(self):
        with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess:
            batch_size = 4
            cell_size = 5
            input_size = 6

            seed = 1994
            initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=seed)

            # Inputs
            x = tf.zeros([batch_size, input_size])
            h = tf.zeros([batch_size, cell_size])

            # Values for the inputs.
            x_value = np.random.rand(batch_size, input_size)
            h_value = np.random.rand(batch_size, cell_size)

            # Output from the basic GRU cell implementation.
            with tf.variable_scope("basic", initializer=initializer):
                output = tf.nn.rnn_cell.GRUCell(cell_size)(x, h)
                sess.run([tf.initialize_all_variables()])
                basic_res = sess.run([output], {x: x_value, h: h_value})

            # Output from the block GRU cell implementation.
            with tf.variable_scope("block", initializer=initializer):
                output = gru_ops.GRUBlockCell(cell_size)(x, h)
                sess.run([tf.initialize_all_variables()])
                block_res = sess.run([output], {x: x_value, h: h_value})

            self.assertEqual(len(block_res), len(basic_res))
            for block, basic in zip(block_res, basic_res):
                self.assertAllClose(block, basic)
 def testSparseBasic(self):
   for dtype in [tf.half, tf.float32]:
     with self.test_session():
       var0 = tf.Variable([[1.0], [2.0]], dtype=dtype)
       var1 = tf.Variable([[3.0], [4.0]], dtype=dtype)
       grads0 = tf.IndexedSlices(
           tf.constant([0.1], shape=[1, 1], dtype=dtype),
           tf.constant([0]),
           tf.constant([2, 1]))
       grads1 = tf.IndexedSlices(
           tf.constant([0.01], shape=[1, 1], dtype=dtype),
           tf.constant([1]),
           tf.constant([2, 1]))
       sgd_op = tf.train.GradientDescentOptimizer(3.0).apply_gradients(
           zip([grads0, grads1], [var0, var1]))
       tf.initialize_all_variables().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval())
       self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           [[1.0 - 3.0 * 0.1], [2.0]], var0.eval())
       self.assertAllCloseAccordingToType(
           [[3.0], [4.0 - 3.0 * 0.01]], var1.eval())
def enlargeDataset(images, byte_data, names, labels, is_hard):
    extendListEightTimes(labels)
    extendListEightTimes(names)
    extendListEightTimes(is_hard)
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        l = len(images)
        for j in range(7):
            print(l)
            train_data2 = []
            start = time.time()
            for i in range(l):
                imageTensor = tf.image.random_contrast(images[i], 0.2, 1.8)
                imageTensor = tf.image.random_flip_left_right(imageTensor)
                imageTensor = tf.image.random_flip_up_down(imageTensor)
                imageTensor = tf.image.random_brightness(imageTensor, max_delta=50 / 255.0)
                imageTensor = tf.image.random_saturation(imageTensor, 0.2, 1.8)
                train_data2.append(imageTensor)
            print(time.time() - start)
            start = time.time()
            train_data2 = sess.run(train_data2)
            print(type(train_data2))
            print('time2:', time.time() - start)
            print train_data2[0][16]
            for i in range(l):
                byte_data.extend(train_data2[i].flatten())
    return byte_data, names, labels, is_hard
Example #10
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
def main(argv):
    mapDict = getKanjiMap()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        #restore variables from training process
        saver = tf.train.Saver(loadParam)
        saver.restore(sess, MODEL_NAME)
        for argc in range(1,len(sys.argv)):
            fName = sys.argv[argc]
            if os.path.isfile(fName):

               img = cv2.imread(fName,0)
               img=prepareImage(img)
               # to ensure that image has 0 mean and [-1:1]
               img = (img - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
               img = img.reshape([1,IMAGE_SIZE,IMAGE_SIZE,1])
    
               predictions = sess.run(
                    eval_prediction,
                    feed_dict={eval_data_node: img})

               labelID = (np.argmax(predictions))
               print("labelID: %d; Recognized Kanji:%s" %(labelID, mapDict[str(labelID)]))

            else:
                print("%s does not exist\n" %(fName))
                continue
Example #12
0
  def testFtrlwithoutRegularization2(self):
    with self.test_session() as sess:
      var0 = tf.Variable([1.0, 2.0])
      var1 = tf.Variable([4.0, 3.0])
      grads0 = tf.constant([0.1, 0.2])
      grads1 = tf.constant([0.01, 0.02])

      opt = tf.train.FtrlOptimizer(3.0,
                                   initial_accumulator_value=0.1,
                                   l1_regularization_strength=0.0,
                                   l2_regularization_strength=0.0)
      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
      tf.initialize_all_variables().run()

      v0_val, v1_val = sess.run([var0, var1])
      self.assertAllClose([1.0, 2.0], v0_val)
      self.assertAllClose([4.0, 3.0], v1_val)

      # Run 3 steps FTRL
      for _ in range(3):
        update.run()
      v0_val, v1_val = sess.run([var0, var1])
      self.assertAllClose(np.array([-2.55607247, -3.98729396]),
                          v0_val)
      self.assertAllClose(np.array([-0.28232238, -0.56096673]),
                          v1_val)
Example #13
0
  def testFtrlWithL1(self):
    with self.test_session() as sess:
      var0 = tf.Variable([1.0, 2.0])
      var1 = tf.Variable([4.0, 3.0])
      grads0 = tf.constant([0.1, 0.2])
      grads1 = tf.constant([0.01, 0.02])

      opt = tf.train.FtrlOptimizer(3.0,
                                   initial_accumulator_value=0.1,
                                   l1_regularization_strength=0.001,
                                   l2_regularization_strength=0.0)
      update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
      tf.initialize_all_variables().run()

      v0_val, v1_val = sess.run([var0, var1])
      self.assertAllClose([1.0, 2.0], v0_val)
      self.assertAllClose([4.0, 3.0], v1_val)

      # Run 10 steps FTRL
      for _ in range(10):
        update.run()
      v0_val, v1_val = sess.run([var0, var1])
      self.assertAllClose(np.array([-7.66718769, -10.91273689]),
                          v0_val)
      self.assertAllClose(np.array([-0.93460727, -1.86147261]),
                          v1_val)
Example #14
0
  def build_graph(self):
    """Build the graph for the full model."""
    opts = self._options
    # The training data. A text file.
    (words, counts, words_per_epoch, self._epoch, self._words, examples,
     labels) = word2vec.skipgram(filename=opts.train_data,
                                 batch_size=opts.batch_size,
                                 window_size=opts.window_size,
                                 min_count=opts.min_count,
                                 subsample=opts.subsample)
    (opts.vocab_words, opts.vocab_counts,
     opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
    opts.vocab_size = len(opts.vocab_words)
    print("Data file: ", opts.train_data)
    print("Vocab size: ", opts.vocab_size - 1, " + UNK")
    print("Words per epoch: ", opts.words_per_epoch)
    self._examples = examples
    self._labels = labels
    self._id2word = opts.vocab_words
    for i, w in enumerate(self._id2word):
      self._word2id[w] = i
    true_logits, sampled_logits = self.forward(examples, labels)
    loss = self.nce_loss(true_logits, sampled_logits)
    tf.scalar_summary("NCE loss", loss)
    self._loss = loss
    self.optimize(loss)

    # Properly initialize all variables.
    tf.initialize_all_variables().run()

    self.saver = tf.train.Saver()
Example #15
0
  def applyOptimizer(self, opt, steps=5, is_sparse=False):
    if is_sparse:
      var0 = tf.Variable([[0.0], [0.0]])
      var1 = tf.Variable([[0.0], [0.0]])
      grads0 = tf.IndexedSlices(tf.constant([0.1], shape=[1, 1]),
                                tf.constant([0]),
                                tf.constant([2, 1]))
      grads1 = tf.IndexedSlices(tf.constant([0.02], shape=[1, 1]),
                                tf.constant([1]),
                                tf.constant([2, 1]))
    else:
      var0 = tf.Variable([0.0, 0.0])
      var1 = tf.Variable([0.0, 0.0])
      grads0 = tf.constant([0.1, 0.2])
      grads1 = tf.constant([0.01, 0.02])

    update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    tf.initialize_all_variables().run()

    sess = tf.get_default_session()
    v0_val, v1_val = sess.run([var0, var1])
    if is_sparse:
      self.assertAllClose([[0.0], [0.0]], v0_val)
      self.assertAllClose([[0.0], [0.0]], v1_val)
    else:
      self.assertAllClose([0.0, 0.0], v0_val)
      self.assertAllClose([0.0, 0.0], v1_val)

    # Run Ftrl for a few steps
    for _ in range(steps):
      update.run()

    v0_val, v1_val = sess.run([var0, var1])
    return v0_val, v1_val
Example #16
0
    def testDenseFeaturesSeparableWithinMargins(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)

            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
            # are within the margins so there is unregularized loss (1/2 per example).
            # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which
            # gives an L2 loss of ~0.25.
            self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
Example #17
0
    def testDenseFeaturesWeightedExamples(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)
            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
            # try to increase the margin from (1.0, 0.5). Due to regularization,
            # (1.0, -0.5) will be within the margin. For these points and example
            # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2
            # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be
            # correct, but the boundary will be much closer to the 2nd point than the
            # first one.
            self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
Example #18
0
    def testDenseFeaturesWithArbitraryWeights(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0]
            )
            options = dict(symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type="squared_loss")
            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # The loss function for these particular features is given by:
            # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
            # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It
            # turns out that the optimal (variable) weights are given by:
            # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and
            # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3.
            # In this case the (unnormalized regularized) loss will be:
            # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The
            # actual loss should be further normalized by the sum of example weights.
            self.assertAllClose([8.0, -10.0 / 3], predictions.eval(), rtol=0.01)
            loss = lr.regularized_loss(examples)
            self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
Example #19
0
    def testDenseFeaturesPerfectlySeparable(self):
        with self._single_threaded_test_session():
            examples, variables = make_dense_examples_and_variables_dicts(
                dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0]
            )
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss")
            model = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = model.predictions(examples)
            binary_predictions = get_binary_predictions_for_hinge(predictions)

            train_op = model.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
            self.assertAllEqual([1, 0], binary_predictions.eval())

            # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is,
            # the SVM's functional margin >=1), so the unregularized loss is ~0.0.
            # There is only loss due to l2-regularization. For these datapoints, it
            # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25.
            unregularized_loss = model.unregularized_loss(examples)
            regularized_loss = model.regularized_loss(examples)
            self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02)
            self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
Example #20
0
    def testL1Regularization(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=4.0, loss_type="squared_loss")
            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            prediction = lr.predictions(examples)
            loss = lr.regularized_loss(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
            #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
            self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08)

            # Loss should be the sum of the regularized loss value from above per
            # example after plugging in the optimal weights.
            self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
Example #21
0
    def testFeatureValues(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0, -2.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0, 2.0),
        ]
        example_weights = [5.0, 3.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)

            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
            # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
            # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the
            # corresponding *example* weights. With the given feature values, the loss
            # function is given by:
            # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2
            # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it
            # can be verified that:
            # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and
            # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to
            # regularization and example weights, the predictions are within:
            # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels.
            self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0], predictions.eval(), atol=0.01)
Example #22
0
    def testSimple(self):
        # Setup test data
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)
            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 2/3 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
            self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0], predictions.eval(), rtol=0.005)
            # Approximate gap should be very close to 0.0. (In fact, because the gap
            # is only approximate, it is likely that upon convergence the duality gap
            # can have a tiny negative value).
            self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), atol=1e-2)
Example #23
0
    def testImbalancedWithExampleWeights(self):
        # Setup test data with 1 positive, and 1 negative example.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [1]}, 1),
        ]
        example_weights = [3.0, 1.0]
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(
                    symmetric_l2_regularization=1,
                    symmetric_l1_regularization=0,
                    num_table_shards=num_shards,
                    loss_type="logistic_loss",
                )

                lr = SdcaModel(examples, variables, options)
                tf.initialize_all_variables().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()

                self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08)
                self.assertAllClose(0.408044, loss.eval(), atol=0.012)
                predicted_labels = get_binary_predictions_for_logistic(predictions)
                self.assertAllEqual([0, 1], predicted_labels.eval())
                self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), rtol=2e-2, atol=1e-2)
Example #24
0
 def testSparseBasic(self):
   for dtype in [tf.half, tf.float32, tf.float64]:
     with self.test_session():
       var0 = tf.Variable([[1.0], [2.0]], dtype=dtype)
       var1 = tf.Variable([[3.0], [4.0]], dtype=dtype)
       grads0 = tf.IndexedSlices(
           tf.constant([0.1], shape=[1, 1], dtype=dtype),
           tf.constant([0]),
           tf.constant([2, 1]))
       grads1 = tf.IndexedSlices(
           tf.constant([0.01], shape=[1, 1], dtype=dtype),
           tf.constant([1]),
           tf.constant([2, 1]))
       ada_opt = tf.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1)
       ada_update = ada_opt.apply_gradients(zip(
           [grads0, grads1], [var0, var1]))
       tf.initialize_all_variables().run()
       # Fetch params to validate initial values
       self.assertAllClose([[1.0], [2.0]], var0.eval())
       self.assertAllClose([[3.0], [4.0]], var1.eval())
       # Run 3 step of sgd
       for _ in range(3):
         ada_update.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           np.array([[-1.6026098728179932], [2.0]]), var0.eval())
       self.assertAllCloseAccordingToType(
           np.array([[3.0], [3.715679168701172]]), var1.eval())
Example #25
0
  def testLSTMBasicToBlockPeeping(self):
    with self.test_session(use_gpu=self._use_gpu) as sess:
      batch_size = 2
      input_size = 3
      cell_size = 4
      sequence_length = 5

      inputs = []
      for _ in range(sequence_length):
        inp = tf.convert_to_tensor(
            np.random.randn(batch_size, input_size),
            dtype=tf.float32)
        inputs.append(inp)

      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212)
      with tf.variable_scope("basic", initializer=initializer):
        cell = tf.nn.rnn_cell.LSTMCell(cell_size,
                                       use_peepholes=True,
                                       state_is_tuple=True)
        outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32)

        sess.run([tf.initialize_all_variables()])
        basic_outputs = sess.run(outputs)
        basic_grads = sess.run(tf.gradients(outputs, inputs))
        basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables()))

      with tf.variable_scope("block", initializer=initializer):
        w = tf.get_variable("w",
                            shape=[input_size + cell_size, cell_size * 4],
                            dtype=tf.float32)
        b = tf.get_variable("b",
                            shape=[cell_size * 4],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer)

        wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32)
        wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32)
        wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32)

        _, _, _, _, _, _, outputs = fused_lstm(
            tf.convert_to_tensor(sequence_length,
                                 dtype=tf.int64),
            inputs,
            w,
            b,
            wci=wci,
            wcf=wcf,
            wco=wco,
            cell_clip=0,
            use_peephole=True)

        sess.run([tf.initialize_all_variables()])
        block_outputs = sess.run(outputs)
        block_grads = sess.run(tf.gradients(outputs, inputs))
        block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco]))

      self.assertAllClose(basic_outputs, block_outputs)
      self.assertAllClose(basic_grads, block_grads)
      for basic, block in zip(basic_wgrads, block_wgrads):
        self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
def train_model(args):
	data_loader = InputHandler(args.data_dir, args.batch_size, args.result_length)
	args.vocabulary_size = data_loader.vocabulary_size

	# Save the original files, so that we can load the model when sampling
	with open(os.path.join(args.snapshots_dir, CONFIGURATION_FILE), 'wb') as f:
		cPickle.dump(args, f)
	with open(os.path.join(args.snapshots_dir, WORDS_VOCABULARY_FILE), 'wb') as f:
		cPickle.dump((data_loader.words, data_loader.vocabulary), f)

	model = RNNModel(args.rnn_size, args.network_depth, args.batch_size, args.result_length,
					 args.vocabulary_size, args.gradient)

	with tf.Session() as session:
		tf.initialize_all_variables().run()
		saver = tf.train.Saver(tf.all_variables())
		for e in range(args.num_epochs):
			session.run(tf.assign(model.lr, args.training_rate * (args.decay_rate ** e)))
			data_loader.set_batch_pointer_to_zero()
			state = model.initial_state.eval()

			for b in range(data_loader.num_batches):
				x, y = data_loader.get_next_batch()
				feed = {model.input_data: x, model.targets: y, model.initial_state: state}
				train_loss, state, _ = session.run([model.cost, model.final_state, model.train_op], feed)
				if (e * data_loader.num_batches + b) % args.snapshot == 0 \
						or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
					snapshot_path = os.path.join(args.snapshots_dir, 'model.ckpt')
					saver.save(session, snapshot_path, global_step = e * data_loader.num_batches + b)
					print("Model snapshot was taken to {}".format(snapshot_path))
Example #27
0
  def testSharing(self):
    for dtype in [tf.half, tf.float32, tf.float64]:
      with self.test_session():
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
        ada_opt = tf.train.AdagradOptimizer(3.0)
        # Apply the optimizer twice.  Both applications will use
        # the same accums.
        ada_update1 = ada_opt.apply_gradients(zip(
            [grads0, grads1], [var0, var1]))
        ada_update2 = ada_opt.apply_gradients(zip(
            [grads0, grads1], [var0, var1]))
        self.assertEqual(["accumulator"], ada_opt.get_slot_names())
        slot0 = ada_opt.get_slot(var0, "accumulator")
        self.assertEquals(slot0.get_shape(), var0.get_shape())
        slot1 = ada_opt.get_slot(var1, "accumulator")
        self.assertEquals(slot1.get_shape(), var1.get_shape())
        tf.initialize_all_variables().run()

        # Fetch params to validate initial values.
        self.assertAllClose([1.0, 2.0], var0.eval())
        self.assertAllClose([3.0, 4.0], var1.eval())
        # Mix the first and the second adagrad for 3 steps.
        ada_update1.run()
        ada_update2.run()
        ada_update1.run()
        # Validate updated params (the same as with only 1 Adagrad).
        self.assertAllCloseAccordingToType(
            np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval())
        self.assertAllCloseAccordingToType(
            np.array([2.715679168701172, 3.715679168701172]), var1.eval())
Example #28
0
    def testInstancesOfOneClassOnly(self):
        # Setup test data with 1 positive (ignored), and 1 negative example.
        example_protos = [
            make_example_proto({"age": [0], "gender": [0]}, 0),
            make_example_proto({"age": [1], "gender": [0]}, 1),  # Shares gender with the instance above.
        ]
        example_weights = [1.0, 0.0]  # Second example "omitted" from training.
        for num_shards in _SHARD_NUMBERS:
            with self._single_threaded_test_session():
                examples = make_example_dict(example_protos, example_weights)
                variables = make_variable_dict(1, 1)
                options = dict(
                    symmetric_l2_regularization=1,
                    symmetric_l1_regularization=0,
                    num_table_shards=num_shards,
                    loss_type="logistic_loss",
                )

                lr = SdcaModel(examples, variables, options)
                tf.initialize_all_variables().run()
                unregularized_loss = lr.unregularized_loss(examples)
                loss = lr.regularized_loss(examples)
                predictions = lr.predictions(examples)
                train_op = lr.minimize()
                for _ in range(_MAX_ITERATIONS):
                    train_op.run()
                self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
                self.assertAllClose(0.525457, loss.eval(), atol=0.01)
                predicted_labels = get_binary_predictions_for_logistic(predictions)
                self.assertAllEqual([0, 0], predicted_labels.eval())
                self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
Example #29
0
 def testBasicLSTMCell(self):
   with self.test_session() as sess:
     with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 2])
       m = tf.zeros([1, 8])
       g, out_m = tf.nn.rnn_cell.MultiRNNCell(
           [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m)
       sess.run([tf.initialize_all_variables()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 8])})
       self.assertEqual(len(res), 2)
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
       expected_mem = np.array([[0.68967271, 0.68967271,
                                 0.44848421, 0.44848421,
                                 0.39897051, 0.39897051,
                                 0.24024698, 0.24024698]])
       self.assertAllClose(res[1], expected_mem)
     with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)):
       x = tf.zeros([1, 3])  # Test BasicLSTMCell with input_size != num_units.
       m = tf.zeros([1, 4])
       g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m)
       sess.run([tf.initialize_all_variables()])
       res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]),
                                   m.name: 0.1 * np.ones([1, 4])})
       self.assertEqual(len(res), 2)
Example #30
0
    def testL2Regularization(self):
        # Setup test data
        example_protos = [
            # 2 identical examples
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            make_example_proto({"age": [0], "gender": [0]}, -10.0),
            # 2 more identical examples
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
            make_example_proto({"age": [1], "gender": [1]}, 14.0),
        ]
        example_weights = [1.0, 1.0, 1.0, 1.0]
        with self._single_threaded_test_session():
            examples = make_example_dict(example_protos, example_weights)
            variables = make_variable_dict(1, 1)
            options = dict(symmetric_l2_regularization=16, symmetric_l1_regularization=0, loss_type="squared_loss")

            lr = SdcaModel(examples, variables, options)
            tf.initialize_all_variables().run()
            predictions = lr.predictions(examples)

            train_op = lr.minimize()
            for _ in range(_MAX_ITERATIONS):
                train_op.run()

            # Predictions should be 1/5 of label due to minimizing regularized loss:
            #   (label - 2 * weight)^2 + L2 * 16 * weight^2
            optimal1 = -10.0 / 5.0
            optimal2 = 14.0 / 5.0
            self.assertAllClose([optimal1, optimal1, optimal2, optimal2], predictions.eval(), rtol=0.01)
def train(checkpoint_dir, image_list, batch_size, normalize):
   with tf.Graph().as_default():

      global_step = tf.Variable(0, name='global_step', trainable=False)

      original_images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 144, 160, 3)) 
      gray_images_placeholder     = tf.placeholder(tf.float32, shape=(batch_size, 144, 160, 3)) 

      # image summary for tensorboard
      tf.image_summary('original_images', original_images_placeholder, max_images=100)
      tf.image_summary('gray_images', gray_images_placeholder, max_images=100)

      logits = architecture.inference(gray_images_placeholder, "train")
      loss   = architecture.loss(original_images_placeholder, logits)

      tf.scalar_summary('loss', loss)
      
      train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step)

      # summary for tensorboard graph
      summary_op = tf.merge_all_summaries()

      variables = tf.all_variables()
      init      = tf.initialize_all_variables()
      sess      = tf.Session()

      try:
         os.mkdir(checkpoint_dir)
      except:
         pass

      sess.run(init)
      print "\nRunning session\n"

      # saver for the model
      saver = tf.train.Saver(tf.all_variables())
      
      tf.train.start_queue_runners(sess=sess)

      # restore previous model if one
      ckpt = tf.train.get_checkpoint_state(checkpoint_dir+"training")
      if ckpt and ckpt.model_checkpoint_path:
         print "Restoring previous model..."
         try:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print "Model restored"
         except:
            print "Could not restore model"
            pass

      # Summary op
      graph_def = sess.graph.as_graph_def(add_shapes=True)
      summary_writer = tf.train.SummaryWriter(checkpoint_dir+"training", graph_def=graph_def)

      # Constants
      step = int(sess.run(global_step))
      #epoch_num = step/(train_size/batch_size)

      while True:
         step += 1
         feed_dict = get_feed_dict(batch_size, original_images_placeholder, gray_images_placeholder, image_list, normalize)
         _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

         if step % 1 == 0:
            print " Step: " + str(sess.run(global_step)) + " Loss: " + str(loss_value)
        
         # save tensorboard stuff
         #if step%200 == 0:
         #   summary_str = sess.run(summary_op)
         #   summary_writer.add_summary(summary_str, step)

         if step%100 == 0:
            print "Saving model"
            print
            saver.save(sess, checkpoint_dir+"training/checkpoint", global_step=global_step)
            print
Example #32
0
    data = preprocess(data)
    train_batch_data, train_batch_label = gen_batch(data, train_label)
    train_batch_data1, train_batch_label = gen_batch1(data, train_label)

    val_batch_data, val_batch_label = gen_batch(data, test_label)
    val_batch_data1, val_batch_label = gen_batch1(data, test_label)

    rand_ix = np.random.permutation(len(train_batch_data))
    train_batch_data, train_batch_data1, train_batch_label = train_batch_data[
        rand_ix], train_batch_data1[rand_ix], train_batch_label[rand_ix]
    rand_ix1 = np.random.permutation(len(val_batch_data))
    val_batch_data, val_batch_data1, val_batch_label = val_batch_data[
        rand_ix1], val_batch_data1[rand_ix1], val_batch_label[rand_ix1]
    model = Model(learning_rate)

    init = tf.initialize_all_variables()
    saver = tf.train.Saver()
    rcnn3d7 = []
    with tf.Session() as sess:
        sess.run(init)

        for i_epoch in range(num_epoch):

            # training step
            total_train_loss = 0.
            total_train_acc = 0.
            for i in range(0, len(train_batch_data), batch_size):
                if i + batch_size >= len(train_batch_data):
                    break
                batch_data = train_batch_data[i:i + batch_size]
                batch_data1 = train_batch_data1[i:i + batch_size]
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('data_dir', '/tmp/data/', 'Directory for storing data')

mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

sess = tf.InteractiveSession()

# Create the model
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)

# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# Train
tf.initialize_all_variables().run()
for i in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    train_step.run({x: batch_xs, y_: batch_ys})

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))

Example #34
0
    def test(self):
        """Test DnCNN"""
        f = file(
            '/home/lyj/tensorflow_code/DnCNN-tensorflow-master/test_per_8layer_4_sigma_25_multi_ensemable.txt',
            'w+')
        #merged = tf.summary.merge_all()
        # init variables
        tf.initialize_all_variables().run()
        test_files = glob('./test/*.png'.format(self.testset))
        #		test_files = glob(os.path.join(self.test_save_dir, '{}/*.png'.format(self.testset)))
        print(test_files)
        counter = 0
        # load testing input
        X_test = tf.placeholder(tf.float32, \
           [1,None,None,1], name='noisy_image_test')
        predicted_noise, layer_1_output = self.sampler(X_test)
        print("[*] Loading test images ...")
        test_data = load_images(test_files)  # list of array of different size
        for order in range(160, 192):
            model_path = './checkpoint/per_8layer_end_3layer_25_half_ensemable_BSD400_128_40/DnCNN.model-%d' % (
                order * 500)
            saver = tf.train.Saver()
            saver.restore(self.sess, model_path)
            psnr_sum = 0
            psnr_init_sum = 0

            for idx in xrange(len(test_files)):
                noisy_image = add_noise(1 / 255.0 * test_data[idx], self.sigma,
                                        self.sess)  # ndarray
                #        			pdb.set_trace()
                noise_ = (noisy_image -
                          1 / 255.0 * test_data[idx]) * 255.0  # ndarray
                #        			noise_var = np.var(noise_[:,:,:,0])
                #        			noise_mean = np.mean(noise_[:,:,:,0])
                #        			print("noise_var: %4f" %(noise_var))
                #        			print("noise_mean: %4f" %(noise_mean))
                #        			plt.figure()
                #        			image = noisy_image[0,:,:,0]
                #        			plt.imshow(image, cmap ='gray')
                #        			pdb.set_trace()
                predicted_noise_, layer_1_output_ = self.sess.run(
                    [predicted_noise, layer_1_output],
                    feed_dict={X_test: noisy_image})
                #        			bn_mean,bn_var,bn_mean_new,bn_var_new = self.sess.run([self.bn_mean,self.bn_var,self.bn_mean_new,self.bn_var_new],feed_dict={X_test : noisy_image})
                #        			bn_mean,bn_var = self.sess.run([self.bn_mean,self.bn_var],feed_dict={X_test : noisy_image})
                #        			show_ = self.show(layer_1_output_,order)

                counter = counter + 1
                #        			predicted_noise_=predicted_noise_*255
                #        			predicted_noise_var = np.var(predicted_noise_)
                #        			predicted_noise_mean = np.mean(predicted_noise_)
                #        			print("predicted_noise_var: %4f" %(predicted_noise_var))
                #        			print("predicted_noise_mean: %4f" %(predicted_noise_mean))
                noisy_image = noisy_image[:, :, :, 0]
                #        			pdb.set_trace()
                output_clean_image = (noisy_image - predicted_noise_) * 255
                #        			output_clean_image = predicted_noise_ * 255
                # calculate PSNR
                groundtruth = np.clip(test_data[idx], 0, 255).astype('uint8')
                groundtruth = groundtruth[:, :, :, 0]
                noisyimage = np.clip(noisy_image * 255, 0, 255).astype('uint8')
                outputimage = np.clip(output_clean_image, 0,
                                      255).astype('uint8')
                psnr = cal_psnr(groundtruth, outputimage)
                print(psnr)
                psnr_init_ = cal_psnr(groundtruth, noisyimage)
                print(psnr_init_)
                psnr_init_sum += psnr_init_
                psnr_sum += psnr
                save_images(
                    groundtruth, noisyimage, outputimage,
                    os.path.join(self.sample_dir,
                                 'test_mean_%d_%d.png' % (idx, counter)))
            avg_psnr = psnr_sum / len(test_files)
            avg_psnr_init = psnr_init_sum / len(test_files)
            #tf.summary.scalar('Average PSNR', avg_psnr)
            print("--- Test ---- Average PSNR %.4f ---" % avg_psnr)
            print("--- Test ---- Average init PSNR %.4f ---" % avg_psnr_init)
            f.write("--- Test ---- Average PSNR %.4f ---" % avg_psnr)
            f.write("\n")
        f.close()
Example #35
0

def ddsigmoid(z):
    return expit(z) * (1.0 - expit(z))


print ddsigmoid(0.387)

sess = tf.InteractiveSession()
tf.reset_default_graph()

one = tf.constant(1.0)
X = tf.placeholder("float")  # create symbolic variable
Y = tf.placeholder("float")  # create symbolic variable

x_77 = tf.constant(0.387)

# derivative of sigmoid= sigmoid(y) * (1.0 - sigmoid(y))

sigmoid = (tf.div(one, (one + tf.exp(-X))))
dsigmoid = tf.multiply(Y, tf.subtract(one, Y))

init = tf.initialize_all_variables(
)  # you need to initialize variables (in this case just variable W)
sess = tf.Session()
print sess.run(init)

print sess.run(sigmoid, {X: x_77.eval(session=sess)})
print sess.run(sigmoid, {X: 0.387})
print sess.run(dsigmoid, {Y: sigmoid.eval({X: 0.387}, session=sess)})
Example #36
0
#RMSPropOptimizer            (初めて耳にしました)学習率の調整を自動化したアルゴリズム
#https://qiita.com/TomokIshii/items/f355d8e87d23ee8e0c7a
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#予測の評価
#tf.argmax(y,1)は各インプットに対して最も確からしいラベルを返し、tf.argmax(y_,1)は正解のラベルを返します。
#そしてtf.equalで私たちの予測が当たっていたかを判定することができます。
#https://qiita.com/qooa/items/3719fec3cfe764674fb9
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))

#型の変換する
#tf.cast( 変換したいもの , 変換後の型 )
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#実行
sess.run(tf.initialize_all_variables())
create_images("before")

#ミニバッチ学習
#バッチ学習  学習データxがN個あるときに、N個のデータを全て用いて、それぞれのデータでの損失lの平均を計算し、それをデータ全体の損失Lとする学習
#確率的勾配法  N個のデータx1,x2,...,xNからランダムに1つxiを選び出し、そのデータ1つに対する損失lをそのままLとする学習
#ミニバッチ学習  全体を考慮したバッチ学習と、確率的勾配法の間を取ったのがミニバッチ学習であり、このとき学習データxがN個あるときに、ランダムなn(≤N)個のデータを使いLを求める学習
#             分類クラス数が多いほど、ミニバッチサイズを小さくすることが有効かも。一番主流。
#https://www.hellocybernetics.tech/entry/2017/07/08/152859
num_epoch = 1001
show = 100
num_data = train_images.shape[0]
batch_size = 16
Loss = []
Accuracy = []
for i in range(1, num_epoch):
Example #37
0
def main():
    # Specify training parameters
    result_2b = './results_2b/'  # directory where the results from the training are saved
    result_2b_test = './results_2b_test/'
    #result_2b_validate = './results_2b_validate/'
    max_step = 5500  # the maximum iterations. After max_step iterations, the training will stop no matter what

    start_time = time.time()  # start timing

    # FILL IN THE CODE BELOW TO BUILD YOUR NETWORK

    # placeholders for input data and input labeles
    x = tf.placeholder(tf.float32, [None, 784], name='x')
    y_ = tf.placeholder(tf.float32, [None, 10], name='y_')

    # reshape the input image
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # first convolutional layer
    with tf.name_scope('firstLayer'):
        with tf.name_scope('weights'):
            W_conv1 = weight_variable([5, 5, 1, 32])
            variable_summaries(W_conv1)
        with tf.name_scope('bias'):
            b_conv1 = bias_variable([32])
            variable_summaries(b_conv1)
        with tf.name_scope('Relu'):
            h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
            variable_summaries(h_conv1)
        with tf.name_scope('max_pool_2x2'):
            h_pool1 = max_pool_2x2(h_conv1)
            variable_summaries(h_pool1)
        with tf.name_scope('NetInput'):
            netinput1 = conv2d(x_image, W_conv1) + b_conv1
            variable_summaries(netinput1)

    # second convolutional layer

    with tf.name_scope('secondLayer'):
        with tf.name_scope('weights'):
            W_conv2 = weight_variable([5, 5, 32, 64])
            variable_summaries(W_conv2)
        with tf.name_scope('bias'):
            b_conv2 = bias_variable([64])
            variable_summaries(b_conv2)
        with tf.name_scope('Relu'):
            h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
            variable_summaries(h_conv2)
        with tf.name_scope('max_pool_2x2'):
            h_pool2 = max_pool_2x2(h_conv2)
            variable_summaries(h_pool2)
        with tf.name_scope('NetInput'):
            netinput2 = conv2d(h_pool1, W_conv2) + b_conv2
            variable_summaries(netinput2)

    # densely connected layer
    with tf.name_scope('denselyLayer'):
        with tf.name_scope('weights'):
            W_fc1 = weight_variable([7 * 7 * 64, 1024])
            variable_summaries(W_fc1)
        with tf.name_scope('bias'):
            b_fc1 = bias_variable([1024])
            variable_summaries(b_fc1)
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        with tf.name_scope('NetInput'):
            netinput3 = tf.matmul(h_pool2_flat, W_fc1) + b_fc1
            variable_summaries(netinput3)
        with tf.name_scope('Relu'):
            h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
            variable_summaries(h_fc1)
        with tf.name_scope('MaxPool'):
            variable_summaries(h_pool2_flat)

    # dropout
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # softmax
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # FILL IN THE FOLLOWING CODE TO SET UP THE TRAINING

    # setup training
    y = tf.nn.softmax(y_conv, name='y')
    cross_entropy = tf.reduce_mean(
        -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                              name='accuracy')

    # Add a scalar summary for the snapshot loss.
    tf.summary.scalar(cross_entropy.op.name, cross_entropy)
    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    # Add the variable initializer Op.
    init = tf.initialize_all_variables()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(result_2b, sess.graph)
    test = tf.summary.FileWriter(result_2b_test, sess.graph)
    #validate_error = tf.summary.FileWriter(result_2b_validate, sess.graph)

    # Run the Op to initialize the variables.
    sess.run(init)

    # run the training
    for i in range(max_step):
        batch = mnist.train.next_batch(
            50
        )  # make the data batch, which is used in the training iteration.
        # the batch size is 50
        if i % 100 == 0:
            # output the training accuracy every 100 iterations
            train_accuracy = accuracy.eval(feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob: 1.0
            })
            print("step %d, training accuracy %g" % (i, train_accuracy))

            # Update the events file which is used to monitor the training (in this case,
            # only the training loss is monitored)
            summary_str = sess.run(summary_op,
                                   feed_dict={
                                       x: batch[0],
                                       y_: batch[1],
                                       keep_prob: 0.5
                                   })
            summary_writer.add_summary(summary_str, i)
            summary_writer.flush()

        # save the checkpoints every 1100 iterations
        if i % 1100 == 0 or i == max_step:
            checkpoint_file = os.path.join(result_2b, 'checkpoint')
            saver.save(sess, checkpoint_file, global_step=i)

            test_summary = sess.run(summary_op,
                                    feed_dict={
                                        x: mnist.test.images,
                                        y_: mnist.test.labels,
                                        keep_prob: 0.5
                                    })
            test.add_summary(test_summary, i)
            test.flush()

        train_step.run(feed_dict={
            x: batch[0],
            y_: batch[1],
            keep_prob: 0.5
        })  # run one train_step

    # print test error
    print("test accuracy %g" % accuracy.eval(feed_dict={
        x: mnist.test.images,
        y_: mnist.test.labels,
        keep_prob: 1.0
    }))

    stop_time = time.time()
    print('The training takes %f second to finish' % (stop_time - start_time))
Example #38
0
  def train(self, config):
    if config.is_train:
      input_setup(self.sess, config)
    else:
      nx, ny = input_setup(self.sess, config)

    if config.is_train:     
      data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5")
    else:
      data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "test.h5")

    train_data, train_label = read_data(data_dir)

    # Stochastic gradient descent with the standard backpropagation
    self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
    tf.initialize_all_variables().run()
    
    counter = 0
    start_time = time.time()

    if self.load(self.checkpoint_dir):
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    if config.is_train:
      print("Training...")
      epoch_loss = 0
      average_loss = 0	
      average_ssim = 0 

      for ep in xrange(config.epoch):#for each epoch
        # Run by batch images
        batch_idxs = len(train_data) // config.batch_size#TODO: check data loader of tensorflow and shuffle training data in each epoch

        for idx in xrange(0, batch_idxs):

          batch_images = train_data[idx*config.batch_size : (idx+1)*config.batch_size]
          batch_labels = train_label[idx*config.batch_size : (idx+1)*config.batch_size]
          counter += 1
          _, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: batch_images, self.labels: batch_labels})#update weights and biases

          average_ssim += ssim(self.pred.eval(feed_dict={self.images: batch_images, self.labels: batch_labels})[:, 33:66, 33:66], self.labels.eval(feed_dict={self.images: batch_images, self.labels: batch_labels}), multichannel=True)/batch_idxs

      epoch_loss += err
      average_loss = epoch_loss / float(batch_idxs)
      PSNR=10*math.log10(1/average_loss)

      if counter % 10 == 0:#display training loss for every 10 batches
        print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" % ((ep+1), counter, time.time()-start_time, err))

      if counter % (batch_idxs*10) == 0:#save model for every 500 batches. Note: final model may not be saved!!!
        self.save(config.checkpoint_dir, counter)
      if counter % batch_idxs == 0:
            with open('data.txt', 'a') as file:
              file.write(str(average_loss) + " , " + str(PSNR)+  " , " + str(average_ssim)+"\n")
              epoch_loss = 0
              average_loss = 0
              average_ssim = 0
    else:
      print("Testing...")

      result = self.pred.eval({self.images: train_data, self.labels: train_label})
      print(nx,ny)
      result = merge(result, [nx, ny])
      result = result.squeeze()
      image_path = os.path.join(os.getcwd(), config.sample_dir)
      image_path = os.path.join(image_path, "test_image.png")
      imsave(result, image_path)
Example #39
0
def train():
    """
    It performs the training of the model and evaluates validation accuracy at specified intervals.
    It also adds a summary of some parameters for visualization in tensorboard.
    """
    # define placeholder for inputs to network
    xs = tf.placeholder(tf.float32, [None, 784])  # 28x28
    ys = tf.placeholder(tf.float32, [None, 10])
    keep_prob = tf.placeholder(tf.float32)
    x_image = tf.reshape(xs, [-1, 28, 28, 1])
    prediction = model(x_image=x_image, keep_prob=keep_prob)
    # Cross entropy function for minimization.
    cross_entropy = tf.reduce_mean(
        -tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))  # loss
    tf.summary.scalar('cross entropy', cross_entropy)

    # Accuracy Scalar of the model.
    with tf.name_scope('accuracy'):
        with tf.name_scope('correct_prediction'):
            correct_prediction = tf.equal(tf.argmax(ys, 1),
                                          tf.argmax(prediction, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    # Summarization for tensorboard.
    merged = tf.summary.merge_all()
    sess = tf.Session()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)
    validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir +
                                              '/validation')
    sess.run(tf.initialize_all_variables())

    n_steps = (mnist.train.num_examples *
               FLAGS.n_epochs) / (FLAGS.BATCH_SIZE) + 1
    # Actual training
    for i in range(n_steps):
        start_time = time.time()
        batch_xs, batch_ys = mnist.train.next_batch(FLAGS.BATCH_SIZE)
        summary, _ = sess.run([merged, train_step],
                              feed_dict={
                                  xs: batch_xs,
                                  ys: batch_ys,
                                  keep_prob: FLAGS.dropout
                              })
        train_writer.add_summary(summary, i)
        if i % FLAGS.eval_num_steps == 0:
            summary, acc = sess.run(
                [merged, accuracy],
                feed_dict={
                    xs: mnist.validation.images,
                    ys: mnist.validation.labels,
                    keep_prob: 1
                })
            validation_writer.add_summary(summary, i)
            logger.info('Step: {0} 	Accuracy: {1} 	Time taken: {2}'.format(
                i, acc,
                time.time() - start_time))
    test_acc = accuracy.eval(session=sess,
                             feed_dict={
                                 xs: mnist.test.images,
                                 ys: mnist.test.labels,
                                 keep_prob: 1
                             })
    logger.info('Test Accuracy = {0}'.format(test_acc))
    train_writer.close()
    validation_writer.close()
    def fit(self):

        # reader = vid_reader.Video_Reader2(self.path_to_train_dir)
        with self.graph.as_default():
            saver = tf.train.Saver()

            best_error = -1

            with tf.Session() as sess:

                if self.path_to_load_variables == '':
                    sess.run(tf.initialize_all_variables())
                else:
                    saver.restore(sess, self.path_to_load_variables)
                    print 'loaded variables ' + self.path_to_load_variables

                ces = []
                last_ce_mean = .9

                for step in range(99999):

                    batch = []
                    labels = []
                    # for i in range(self.batch_size):
                    while len(batch) != self.batch_size:
                        # samp, label = reader.get_rand_vid_and_label()

                        # if len(samp) != 8:
                        # 	print 'WHAT!'
                        # 	continue

                        # distorted_samp = []
                        # for j in range(len(samp)):

                        # 	distorted_image = tf.image.random_contrast(tf.image.random_brightness(samp[j],max_delta=63),lower=0.2, upper=1.8)
                        # 	distorted_samp.append(distorted_image)
                        seq = make_ball_gif()

                        for i in range(len(seq)):
                            seq[i] = seq[i] / np.max(seq[i])

                        seq1 = list(seq)
                        seq1.pop(-1)
                        batch.append(seq1)

                        seq2 = list(seq)
                        seq2.pop(0)
                        # seq2.append(np.zeros((self.image_height,self.image_width,1)))
                        labels.append(seq2)

                    # batch = np.array(batch)
                    # print batch.shape

                    feed_dict = {self.input: batch, self.target: labels}

                    # _ = sess.run(self.train_opt, feed_dict=feed_dict)

                    # ce, ff = sess.run([self.cross_entropy, self.logits], feed_dict=feed_dict)
                    # _ = sess.run([self.train_opt], feed_dict=feed_dict)
                    # ce2, ff = sess.run([self.cross_entropy, self.logits], feed_dict=feed_dict)
                    # print step, ce2, ce-ce2, best_error

                    # print len(batch)
                    # print batch[0].shape
                    # fsdaas

                    # _, ce = sess.run([self.train_opt, self.cross_entropy], feed_dict=feed_dict)
                    ce = sess.run(self.cross_entropy, feed_dict=feed_dict)
                    print ce
                    _ = sess.run(self.train_opt, feed_dict=feed_dict)
                    ce = sess.run(self.cross_entropy, feed_dict=feed_dict)
                    print ce
                    print

                    # if ce < .25:
                    # 	break

                    if step % 400 == 0:
                        act_out = sess.run(self.actual_output,
                                           feed_dict=feed_dict)
                        print act_out

                saver.save(sess, self.path_to_save_variables)
                print 'Saved variables to ' + self.path_to_save_variables
Example #41
0
# Fit a straight line, of the form y=m*x+b

import tensorflow as tf

xs = [ 0.00,  1.00,  2.00, 3.00, 4.00, 5.00, 6.00, 7.00] # Features
ys = [-0.82, -0.94, -0.12, 0.26, 0.39, 0.64, 1.02, 1.00] # Labels

m_initial = -0.5 # Initial guesses
b_initial =  1.0

m = tf.Variable(m_initial) # Parameters
b = tf.Variable(b_initial)

ys_model = m*xs+b # Tensorflow knows this is a vector operation
error = tf.reduce_sum((ys-ys_model)**2) # Sum up every item in the vector

operation = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(error) # Does one step

with tf.Session() as session:
	session.run(tf.initialize_all_variables()) # Initialize session

	for iteration in range(10000):
		session.run(operation)

	print('Slope:', m.eval(), 'Intercept:', b.eval())

Example #42
0
def train(data_dirs,
          batch_size=32,
          num_classes=1,
          augment_data=True,
          checkpoint_dir='checkpoints',
          restore_checkpoint=True,
          checkpoint_file=None,
          restore_step=None,
          save_checkpoint_step=1000,
          save_summary_step=100,
          log_step=10,
          dropout=0.8,
          max_steps=100000,
          num_examples_per_epoch=1000,
          log_device_placement=False,
          cameras=None,
          min_angle=None,
          max_angle=None):

    with tf.Graph().as_default():

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if restore_checkpoint and ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            if not restore_step:
                restore_step = int(
                    ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])

            print('Checkpoint step:', restore_step)
            global_step = tf.Variable(restore_step, trainable=False)

        else:
            global_step = tf.Variable(0, trainable=False)
            restore_step = 0
            print('No checkpoint file found')

        # Get images and labels.
        images, labels = nvidia_input.inputs(
            batch_size=batch_size,
            data_dirs=data_dirs,
            shuffle=True,
            num_classes=num_classes,
            augment_data=augment_data,
            num_examples_per_epoch=num_examples_per_epoch,
            cameras=cameras,
            min_angle=min_angle,
            max_angle=max_angle,
            raw_labels=False)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        output = nvidia_model.inference(images,
                                        dropout,
                                        num_classes=num_classes)

        # Calculate loss.
        _loss = nvidia_model.loss(output, labels, num_classes=num_classes)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = nvidia_model.train(_loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=log_device_placement))
        sess.run(init)

        if restore_checkpoint and ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(
                sess,
                os.path.join(checkpoint_dir,
                             'model.ckpt-{}'.format(restore_step)))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(checkpoint_dir, sess.graph)

        for step in range(restore_step, max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, _loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % log_step == 0:
                num_examples_per_step = batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.6f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if step > 0 and step % save_summary_step == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step > 0 and (step % save_checkpoint_step == 0 or
                             (step + 1) == max_steps):
                checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Example #43
0
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')

    #生成隐藏层参数
    weights1 = tf.Variable(
        #正态分布,标准差为0.1
        tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    #生成输出层参数
    weights2 = tf.Variable(
        tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    y = inference(x, None, weights1, biases1, weights2, biases2)
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x, variable_averages, weights1, biases1, weights2,
                          biases2)
    #计算交叉熵作为刻画预测值和真实值之间差距的损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))
    #计算在当前batch中所有样例的交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    #计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    #计算模型的正则化损失。一般只计算NN边上权重的正则化损失,而不使用偏置项
    regularization = regularizer(weights1) + regularizer(weights2)
    #总损失等于交叉熵损失和正则化损失的和
    loss = cross_entropy_mean + regularization

    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY)

    #使用tf.train.GradientDescentOptimizer优化算法来优化损失函数
    #此处损失函数包含了交叉熵损失和L2正则化损失
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
                .minimize(loss, global_step=global_step)

    #为了一次完成多个操作,TensorFlow提供了tf.control_dependencies和tf.group两种机制
    #下面两行程序和train_op = tf.group(train_step,variables_averages_op)是等价的
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    #此运算首先将一个布尔型的数值转换为实数型,然后计算平均值
    #此平均值即为模型在这一组数据上的正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #初始化会话并开始训练
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        #准备验证数据
        #在NN的训练过程中通过验证数据来大致判断停止的条件和评判训练的效果
        validate_feed = {
            x: mnist.validation.images,
            y_: mnist.validation.labels
        }

        #准备测试数据
        #在实际应用中,此部分数据训练时不可见,只是作为模型优劣的最后评价标准
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}

        #迭代训练神经网络
        for i in range(TRAINING_STEPS):
            #每1000轮输出一次在验证数据集上的测试结果
            if i % 1000 == 0:
                #计算滑动平均模型在验证数据上的结果
                #因为MNIST数据集比较小,故一次可以处理所有的验证数据
                #当NN模型比较复杂或者验证数据比较大时,太大的batch会导致计算时间过长甚至发生内存溢出的错误
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training steps, validation accuracy "
                      "using average model is %g" % (i, validate_acc))

                #产生此轮使用的一个batch训练数据,并运行训练过程
                xs, ys = mnist.train.next_batch(BATCH_SIZE)
                sess.run(train_op, feed_dict={x: xs, y_: ys})

        #训练结束后,在测试数据上检测神经网络模型的最终正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training steps, test accuracy "
              "using average model is %g" % (TRAINING_STEPS, test_acc))
Example #44
0
    def train(self, config):
        """Train DCGAN"""
        if config.dataset == 'mnist':
            data_X, data_y = self.load_mnist()
        else:
            data = glob(
                os.path.join("./data", config.dataset,
                             self.input_fname_pattern))
            data2 = glob(
                os.path.join("./data", config.dataset2,
                             self.input_fname_pattern))
            # np.random.shuffle(data)

        d_optim = tf.train.AdamOptimizer(config.learning_rate,
                                         beta1=config.beta1).minimize(
                                             self.d_loss, var_list=self.d_vars)
        g_optim = tf.train.AdamOptimizer(config.learning_rate,
                                         beta1=config.beta1).minimize(
                                             self.g_loss, var_list=self.g_vars)

        # Wasserstein-GAN
        d_optim = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(
            -self.d_loss, var_list=self.d_vars)
        g_optim = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize(
            -self.g_loss, var_list=self.g_vars)
        clip_d = [
            tf.assign(var, tf.clip_by_value(var, -0.01, 0.01))
            for var in self.d_vars
        ]

        try:
            tf.global_variables_initializer().run()
        except:
            # Fit for different APIs of Tensorflow
            tf.initialize_all_variables().run()

        self.g_sum = merge_summary([
            self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum,
            self.g_loss_sum
        ])
        self.d_sum = merge_summary(
            [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
        self.writer = SummaryWriter("./logs", self.sess.graph)
        '''
        To Do:
        Option 1: add a function that maps images to [?, self.z_dim] vector (Done, not good)
        Option 2: modify z and related parameters
        Option 3: add a conv layer that maps images to z
        Option 4: add a CS layer that maps images to ?
        '''

        #sample_z = np.random.uniform(-1, 1, size=(self.sample_num, self.z_dim))
        '''need modify'''
        sample_z = np.random.uniform(-1, 1, size=(self.sample_num, self.z_dim))

        if config.dataset == 'mnist':
            sample_inputs = data_X[0:self.sample_num]
            sample_labels = data_y[0:self.sample_num]
        else:
            '''Need to prepare additional sample inputs for outputs'''
            sample_files = data[0:self.sample_num]
            sample = [
                get_image(sample_file,
                          input_height=self.input_height,
                          input_width=self.input_width,
                          resize_height=self.output_height,
                          resize_width=self.output_width,
                          is_crop=self.is_crop,
                          is_grayscale=self.is_grayscale)
                for sample_file in sample_files
            ]

            sample_zs = data2[0:self.sample_num]
            sample_2 = [
                get_image(sample_file,
                          input_height=self.input_height,
                          input_width=self.input_width,
                          resize_height=self.output_height,
                          resize_width=self.output_width,
                          is_crop=self.is_crop,
                          is_grayscale=self.is_grayscale)
                for sample_file in sample_zs
            ]
            sample_blurred = copy.deepcopy(sample_2)
            sample_2 = [s.reshape([self.z_dim]) for s in sample_2]

            if (self.is_grayscale):
                sample_inputs = np.array(sample).astype(np.float32)[:, :, :,
                                                                    None]
            else:
                sample_inputs = np.array(sample).astype(np.float32)
                sample_z = np.array(sample_2).astype(np.float32)

        counter = 1
        start_time = time.time()
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)
        if could_load:
            counter = checkpoint_counter
            print(" [***] Load SUCCESS")
        else:
            print(" [!!!] Load failed...")

        for epoch in xrange(config.epoch):
            if config.dataset == 'mnist':
                batch_idxs = min(len(data_X),
                                 config.train_size) // config.batch_size
            else:
                data = glob(
                    os.path.join("./data", config.dataset,
                                 self.input_fname_pattern))
                data2 = glob(
                    os.path.join("./data", config.dataset2,
                                 self.input_fname_pattern))
                batch_idxs = min(len(data),
                                 config.train_size) // config.batch_size

            for idx in xrange(0, batch_idxs):
                '''need modify'''
                batch_z = np.random.uniform(
                    -1, 1, [config.batch_size, self.z_dim]).astype(np.float32)

                if config.dataset == 'mnist':
                    batch_images = data_X[idx * config.batch_size:(idx + 1) *
                                          config.batch_size]
                    batch_labels = data_y[idx * config.batch_size:(idx + 1) *
                                          config.batch_size]
                else:
                    '''Need to prepare additional batch inputs for training, namely, z'''
                    batch_files = data[idx * config.batch_size:(idx + 1) *
                                       config.batch_size]
                    batch = [
                        get_image(batch_file,
                                  input_height=self.input_height,
                                  input_width=self.input_width,
                                  resize_height=self.output_height,
                                  resize_width=self.output_width,
                                  is_crop=self.is_crop,
                                  is_grayscale=self.is_grayscale)
                        for batch_file in batch_files
                    ]

                    batch_zs = data2[0:self.sample_num]
                    batch_2 = [
                        get_image(batch_file,
                                  input_height=self.input_height,
                                  input_width=self.input_width,
                                  resize_height=self.output_height,
                                  resize_width=self.output_width,
                                  is_crop=self.is_crop,
                                  is_grayscale=self.is_grayscale).reshape(
                                      [self.z_dim]) for batch_file in batch_zs
                    ]
                    if (self.is_grayscale):
                        batch_images = np.array(batch).astype(
                            np.float32)[:, :, :, None]
                    else:
                        batch_images = np.array(batch).astype(np.float32)
                        batch_z = np.array(batch_2).astype(np.float32)

                if config.dataset == 'mnist':
                    # Update D network
                    _, summary_str = self.sess.run(
                        [d_optim, self.d_sum],
                        feed_dict={
                            self.inputs: batch_images,
                            self.z: batch_z,
                            self.y: batch_labels,
                        })
                    self.writer.add_summary(summary_str, counter)

                    # Update G network
                    _, summary_str = self.sess.run([g_optim, self.g_sum],
                                                   feed_dict={
                                                       self.z: batch_z,
                                                       self.y: batch_labels,
                                                   })
                    self.writer.add_summary(summary_str, counter)

                    # Run g_optim twice to make sure that d_loss does not go to
                    # zero (different from paper)
                    _, summary_str = self.sess.run([g_optim, self.g_sum],
                                                   feed_dict={
                                                       self.z: batch_z,
                                                       self.y: batch_labels
                                                   })
                    self.writer.add_summary(summary_str, counter)

                    errD_fake = self.d_loss_fake.eval({
                        self.z: batch_z,
                        self.y: batch_labels
                    })
                    errD_real = self.d_loss_real.eval({
                        self.inputs: batch_images,
                        self.y: batch_labels
                    })
                    errG = self.g_loss.eval({
                        self.z: batch_z,
                        self.y: batch_labels
                    })
                else:
                    ###################################################################################
                    # Update D network
                    _, summary_str = self.sess.run([d_optim, self.d_sum],
                                                   feed_dict={
                                                       self.inputs:
                                                       batch_images,
                                                       self.z: batch_z
                                                   })
                    self.writer.add_summary(summary_str, counter)

                    # Update G network
                    _, summary_str = self.sess.run([g_optim, self.g_sum],
                                                   feed_dict={self.z: batch_z})
                    self.writer.add_summary(summary_str, counter)

                    # Run g_optim twice to make sure that d_loss does not go to
                    # zero (different from paper)
                    _, summary_str = self.sess.run([g_optim, self.g_sum],
                                                   feed_dict={self.z: batch_z})
                    self.writer.add_summary(summary_str, counter)
                    ###################################################################################
                    # Wasserstein-GAN
                    _, summary_str, _ = self.sess.run(
                        [d_optim, self.d_sum, clip_d],
                        feed_dict={
                            self.inputs: batch_images,
                            self.z: batch_z
                        })
                    self.writer.add_summary(summary_str, counter)

                    # Update G network
                    if idx % 5 == 0:
                        _, summary_str = self.sess.run(
                            [g_optim, self.g_sum], feed_dict={self.z: batch_z})
                        self.writer.add_summary(summary_str, counter)
                    ###################################################################################

                    errD_fake = self.d_loss_fake.eval({self.z: batch_z})
                    errD_real = self.d_loss_real.eval(
                        {self.inputs: batch_images})
                    errG = self.g_loss.eval({self.z: batch_z})

                counter += 1
                print(
                    "Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f"
                    % (epoch, idx, batch_idxs, time.time() - start_time,
                       errD_fake + errD_real, errG))

                # outputs
                if np.mod(counter, 100) == 1:
                    if config.dataset == 'mnist':
                        samples, d_loss, g_loss = self.sess.run(
                            [self.sampler, self.d_loss, self.g_loss],
                            feed_dict={
                                self.z: sample_z,
                                self.inputs: sample_inputs,
                                self.y: sample_labels,
                            })
                        manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
                        manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
                        save_images(
                            samples, [manifold_h, manifold_w],
                            './{}/train_{:02d}_{:04d}.png'.format(
                                config.sample_dir, epoch, idx))
                        print("[Sample] d_loss: %.8f, g_loss: %.8f" %
                              (d_loss, g_loss))
                    else:
                        try:
                            samples, d_loss, g_loss = self.sess.run(
                                [self.sampler, self.d_loss, self.g_loss],
                                feed_dict={
                                    self.z: sample_z,
                                    self.inputs: sample_inputs,
                                },
                            )
                            manifold_h = int(np.ceil(np.sqrt(
                                samples.shape[0])))
                            manifold_w = int(
                                np.floor(np.sqrt(samples.shape[0])))

                            save_images(
                                samples, [manifold_h, manifold_w],
                                './{}/train_{:02d}_{:04d}.png'.format(
                                    config.sample_dir, epoch, idx))

                            sample_blurred = np.asarray(sample_blurred)
                            save_images(
                                sample_blurred, [manifold_h, manifold_w],
                                './{}/blurred_{:02d}_{:04d}.png'.format(
                                    config.sample_dir, epoch, idx))

                            print("[Sample] d_loss: %.8f, g_loss: %.8f" %
                                  (d_loss, g_loss))
                        except:
                            print("one pic error!...")

                if np.mod(counter, 500) == 2:
                    self.save(config.checkpoint_dir, counter)
Example #45
0
def train(outdir):
    HAVE_TRUTH = False
    ''' Set up paths and start log '''
    npzfile = outdir + 'result'
    repfile = outdir + 'reps'
    outform = outdir + 'y_pred'
    lossform = outdir + 'loss'
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()
    ''' Set random seeds '''
    random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    ''' Save parameters '''
    save_config(outdir + 'config.txt')
    log(
        logfile, 'Training with hyperparameters: alpha=%.2e, lambda=%.2e' %
        (FLAGS.p_alpha, FLAGS.p_lambda))
    ''' Load data '''
    log(logfile, 'Loading data for dimensions... ' + FLAGS.datapath)
    x_all, t_all, y_f_all, y_cf_all = load_data(FLAGS.datapath)
    if not y_cf_all is None:
        HAVE_TRUTH = True
    dim = x_all.shape[1]
    n = x_all.shape[0]

    log(logfile, 'Loaded data with shape [%d,%d]' % (n, dim))
    ''' Start Session '''
    log(logfile, 'Starting session...')
    sess = tf.Session()
    ''' Initialize input placeholders '''
    x_ = tf.placeholder("float", shape=[None, dim], name='x_')  # Features
    t_ = tf.placeholder("float", shape=[None, 1], name='t_')  # Treatent
    y_ = tf.placeholder("float", shape=[None, 1], name='y_')  # Outcome
    ''' Parameter placeholders '''
    alpha_ = tf.placeholder("float", name='alpha_')
    lambda_ = tf.placeholder("float", name='lambda_')
    do_in = tf.placeholder("float", name='dropout_in')
    do_out = tf.placeholder("float", name='dropout_out')
    p = tf.placeholder("float", name='p_treated')
    ''' Define model graph '''
    log(logfile, 'Defining graph...')
    dims = [dim, FLAGS.dim_in, FLAGS.dim_out]
    CFR = cfr.cfr_net(x_, t_, y_, p, FLAGS, alpha_, lambda_, do_in, do_out,
                      dims)

    if FLAGS.varsel:
        w_proj = tf.placeholder("float", shape=[dim], name='w_proj')
        projection = CFR.weights_in[0].assign(w_proj)
    ''' Set up optimizer '''
    log(logfile, 'Training...')
    global_step = tf.Variable(0, trainable=False)
    lr = tf.train.exponential_decay(FLAGS.lrate, global_step, \
        NUM_ITERATIONS_PER_DECAY, FLAGS.lrate_decay, staircase=True)
    train_step = tf.train.RMSPropOptimizer(lr, FLAGS.decay).minimize(
        CFR.tot_loss, global_step=global_step)
    ''' Compute treatment probability'''
    t_cf_all = 1 - t_all
    if FLAGS.use_p_correction:
        p_treated = np.mean(t_all)
    else:
        p_treated = 0.5
    ''' Set up loss feed_dicts'''
    dict_factual = {x_: x_all, t_: t_all, y_: y_f_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, \
        lambda_:FLAGS.p_lambda, p:p_treated}

    if HAVE_TRUTH:
        dict_cfactual = {x_: x_all, t_: t_cf_all, y_: y_cf_all, \
            do_in:1.0, do_out:1.0}
    ''' Initialize tensorflow variables '''
    sess.run(tf.initialize_all_variables())
    ''' Compute losses before training'''
    losses = []
    obj_loss, f_error, imb_err = sess.run([CFR.tot_loss, CFR.pred_loss, \
        CFR.imb_loss], feed_dict=dict_factual)

    cf_error = np.nan
    if HAVE_TRUTH:
        cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

    losses.append([obj_loss, f_error, cf_error, imb_err])

    log(logfile, 'Objective Factual CFactual Imbalance')
    log(logfile, str(losses[0]))
    ''' Train for m iterations '''
    for i in range(FLAGS.iterations):
        ''' Fetch sample '''
        I = random.sample(range(0, n), FLAGS.batch_size)
        x_batch = x_all[I, :]
        t_batch = t_all[I]
        y_batch = y_f_all[I]
        ''' Do one step of gradient descent '''
        sess.run(train_step, feed_dict={x_: x_batch, t_: t_batch, \
            y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
            alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})
        ''' Project variable selection weights '''
        if FLAGS.varsel:
            wip = cfr.simplex_project(sess.run(CFR.weights_in[0]), 1)
            sess.run(projection, feed_dict={w_proj: wip})
        ''' Compute loss every N iterations '''
        if i % FLAGS.output_delay == 0:
            obj_loss, f_error, imb_err = sess.run(
                [CFR.tot_loss, CFR.pred_loss, CFR.imb_loss],
                feed_dict=dict_factual)

            y_pred = sess.run(CFR.output, feed_dict={x_: x_batch, t_: t_batch, \
                y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \
                alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated})

            cf_error = np.nan
            if HAVE_TRUTH:
                cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual)

            losses.append([obj_loss, f_error, cf_error, imb_err])
            loss_str = str(
                i) + '\tObj: %.4g,\tF: %.4g,\tCf: %.4g,\tImb: %.4g' % (
                    obj_loss, f_error, cf_error, imb_err)

            if FLAGS.loss == 'log':
                y_pred = 1.0 * (y_pred > 0.5)
                acc = 100 * (1 - np.mean(np.abs(y_batch - y_pred)))
                loss_str += ',\tAcc: %.2f%%' % acc

            log(logfile, loss_str)

    log(logfile, 'Ending learning rate: %.2g' % sess.run(lr))
    ''' Predict response and store '''
    ypred_f = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})
    ypred_c = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_cf_all, \
        do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda})

    ypred = np.concatenate((ypred_f, ypred_c), axis=1)

    log(logfile, 'Saving to %s...' % outform)
    if FLAGS.output_csv:
        np.savetxt('%s.csv' % (outform), ypred, delimiter=',')
        np.savetxt('%s.csv' % (lossform), losses, delimiter=',')
    ''' Compute weights'''
    if FLAGS.varsel:
        all_weights = np.dstack((all_weights, sess.run(CFR.weights_in[0])))
        all_beta = np.dstack((all_beta, sess.run(CFR.weights_pred)))
    ''' Save results and predictions '''
    if FLAGS.varsel:
        np.savez(npzfile,
                 pred=ypred,
                 loss=losses,
                 w=all_weights,
                 beta=all_beta)
    else:
        np.savez(npzfile, pred=ypred, loss=losses)
    ''' Save representations '''
    if FLAGS.save_rep:
        reps = sess.run([CFR.h_rep],
                        feed_dict={
                            x_: x_all,
                            do_in: 1.0,
                            do_out: 0.0
                        })
        np.savez(repfile, rep=reps)
Example #46
0
  def train(self, config):
    d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
              .minimize(self.d_loss, var_list=self.d_vars)
    g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
              .minimize(self.g_loss, var_list=self.g_vars)
    try:
      tf.global_variables_initializer().run()
    except:
      tf.initialize_all_variables().run()

    self.g_sum = merge_summary([self.z_sum, self.d__sum,
      self.G_sum, self.d_loss_fake_sum, self.g_loss_sum])
    self.d_sum = merge_summary(
        [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
    self.writer = SummaryWriter("./logs", self.sess.graph)

    sample_z = np.random.uniform(-1, 1, size=(self.sample_num , self.z_dim))
    
    sample_files = self.data[0:self.sample_num]
    sample = [
        get_image(sample_file,
                  input_height=self.input_height,
                  input_width=self.input_width,
                  resize_height=self.output_height,
                  resize_width=self.output_width,
                  crop=self.crop,
                  grayscale=self.grayscale) for sample_file in sample_files]
    if (self.grayscale):
      sample_inputs = np.array(sample).astype(np.float32)[:, :, :, None]
    else:
      sample_inputs = np.array(sample).astype(np.float32)

    counter = 1
    start_time = time.time()
    could_load, checkpoint_counter = self.load(self.checkpoint_dir)
    if could_load:
      counter = checkpoint_counter
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    for epoch in xrange(config.epoch):  
      self.data = glob(os.path.join(
        config.data_dir, config.dataset, self.input_fname_pattern))
      batch_idxs = min(len(self.data), config.train_size) // config.batch_size

      for idx in xrange(0, batch_idxs):
        batch_files = self.data[idx*config.batch_size:(idx+1)*config.batch_size]
        batch = [
            get_image(batch_file,
                      input_height=self.input_height,
                      input_width=self.input_width,
                      resize_height=self.output_height,
                      resize_width=self.output_width,
                      crop=self.crop,
                      grayscale=self.grayscale) for batch_file in batch_files]

        # Remove None values
        batch = [x for x in batch if x is not None]
          
        if self.grayscale:
          batch_images = np.array(batch).astype(np.float32)[:, :, :, None]
        else:
          batch_images = np.array(batch).astype(np.float32)

        batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
              .astype(np.float32)


        # Update D network
        _, summary_str = self.sess.run([d_optim, self.d_sum],
          feed_dict={ self.inputs: batch_images, self.z: batch_z })
        self.writer.add_summary(summary_str, counter)

        # Update G network
        _, summary_str = self.sess.run([g_optim, self.g_sum],
          feed_dict={ self.z: batch_z })
        self.writer.add_summary(summary_str, counter)

        # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
        _, summary_str = self.sess.run([g_optim, self.g_sum],
          feed_dict={ self.z: batch_z })
        self.writer.add_summary(summary_str, counter)
        
        errD_fake = self.d_loss_fake.eval({ self.z: batch_z })
        errD_real = self.d_loss_real.eval({ self.inputs: batch_images })
        errG = self.g_loss.eval({self.z: batch_z})

        counter += 1
        print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
          % (epoch, config.epoch, idx, batch_idxs,
            time.time() - start_time, errD_fake+errD_real, errG))

        if np.mod(counter, 100) == 1:
          try:
            samples, d_loss, g_loss = self.sess.run(
              [self.sampler, self.d_loss, self.g_loss],
              feed_dict={
                  self.z: sample_z,
                  self.inputs: sample_inputs,
              },
            )
            save_images(samples, image_manifold_size(samples.shape[0]),
                  './{}/train_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx))
            print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) 
          except:
            print("one pic error!...")

      if np.mod(counter, 500) == 2:
        self.save(config.checkpoint_dir, counter)
Example #47
0
 def train(self, lr = 1e-3, epochs=100):
     optimizer = Adam(lr=lr, decay=lr/10)
     self.model.compile(loss="sparse_categorical_crossentropy", optimizer= optimizer, metrics = ['accuracy'])
     keras.backend.get_session().run(tf.initialize_all_variables())
     history = self.model.fit(self.X_train, self.y_train_vect, batch_size= 50, nb_epoch=100, verbose=1, validation_data=(self.X_val, self.y_val_vect))
Example #48
0
def main(argv=None):
    ps_hosts = FLAGS.ps_hosts.split(",")
    worker_hosts = FLAGS.worker_hosts.split(",")
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
    server = tf.train.Server(cluster,
                             job_name=FLAGS.job_name,
                             task_index=FLAGS.task_index)
    if FLAGS.self_test:
        print('Running self-test.')
        train_data, train_labels = fake_data(256)
        validation_data, validation_labels = fake_data(EVAL_BATCH_SIZE)
        test_data, test_labels = fake_data(EVAL_BATCH_SIZE)
        num_epochs = 1
    else:
        train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
        train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
        test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
        test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

        train_data = extract_data(train_data_filename, 60000)
        train_labels = extract_labels(train_labels_filename, 60000)
        test_data = extract_data(test_data_filename, 10000)
        test_labels = extract_labels(test_labels_filename, 10000)

        validation_data = train_data[:VALIDATION_SIZE, ...]
        validation_labels = train_labels[:VALIDATION_SIZE]
        train_data = train_data[VALIDATION_SIZE:, ...]
        train_labels = train_labels[VALIDATION_SIZE:]
        num_epochs = NUM_EPOCHS
    if FLAGS.job_name == "ps":
        server.join()
    elif FLAGS.job_name == "worker":
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % FLAGS.task_index,
                    cluster=cluster)):
            train_size = train_labels.shape[0]
            train_data_node = tf.placeholder(tf.float32,
                                             shape=(BATCH_SIZE, IMAGE_SIZE,
                                                    IMAGE_SIZE, NUM_CHANNELS))
            train_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE, ))
            eval_data = tf.placeholder(tf.float32,
                                       shape=(EVAL_BATCH_SIZE, IMAGE_SIZE,
                                              IMAGE_SIZE, NUM_CHANNELS))
            conv1_weights = tf.Variable(
                tf.truncated_normal([5, 5, NUM_CHANNELS, 32],
                                    stddev=0.1,
                                    seed=SEED))
            conv1_biases = tf.Variable(tf.zeros([32]))
            conv2_weights = tf.Variable(
                tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED))
            conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]))
            fc1_weights = tf.Variable(
                tf.truncated_normal(
                    [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
                    stddev=0.1,
                    seed=SEED))
            fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
            fc2_weights = tf.Variable(
                tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED))
            fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

            def model(data, train=False):
                conv = tf.nn.conv2d(data,
                                    conv1_weights,
                                    strides=[1, 1, 1, 1],
                                    padding='SAME')
                relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
                pool = tf.nn.max_pool(relu,
                                      ksize=[1, 2, 2, 1],
                                      strides=[1, 2, 2, 1],
                                      padding='SAME')
                conv = tf.nn.conv2d(pool,
                                    conv2_weights,
                                    strides=[1, 1, 1, 1],
                                    padding='SAME')
                relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
                pool = tf.nn.max_pool(relu,
                                      ksize=[1, 2, 2, 1],
                                      strides=[1, 2, 2, 1],
                                      padding='SAME')
                pool_shape = pool.get_shape().as_list()
                reshape = tf.reshape(pool, [
                    pool_shape[0],
                    pool_shape[1] * pool_shape[2] * pool_shape[3]
                ])

                hidden = tf.nn.relu(
                    tf.matmul(reshape, fc1_weights) + fc1_biases)
                if train:
                    hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
                return tf.matmul(hidden, fc2_weights) + fc2_biases

            logits = model(train_data_node, True)
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits, train_labels_node))
            regularizers = (tf.nn.l2_loss(fc1_weights) +
                            tf.nn.l2_loss(fc1_biases) +
                            tf.nn.l2_loss(fc2_weights) +
                            tf.nn.l2_loss(fc2_biases))
            loss += 5e-4 * regularizers
            batch = tf.Variable(0)
            learning_rate = tf.train.exponential_decay(0.01,
                                                       batch * BATCH_SIZE,
                                                       train_size,
                                                       0.95,
                                                       staircase=True)
            optimizer = tf.train.MomentumOptimizer(
                learning_rate, 0.9).minimize(loss, global_step=batch)
            train_prediction = tf.nn.softmax(logits)
            eval_prediction = tf.nn.softmax(model(eval_data))

            def eval_in_batches(data, sess):
                size = data.shape[0]
                if size < EVAL_BATCH_SIZE:
                    raise ValueError(
                        "batch size for evals larger than dataset: %d" % size)
                predictions = numpy.ndarray(shape=(size, NUM_LABELS),
                                            dtype=numpy.float32)
                for begin in xrange(0, size, EVAL_BATCH_SIZE):
                    end = begin + EVAL_BATCH_SIZE
                    if end <= size:
                        predictions[begin:end, :] = sess.run(
                            eval_prediction,
                            feed_dict={eval_data: data[begin:end, ...]})
                    else:
                        batch_predictions = sess.run(
                            eval_prediction,
                            feed_dict={
                                eval_data: data[-EVAL_BATCH_SIZE:, ...]
                            })
                        predictions[begin:, :] = batch_predictions[begin -
                                                                   size:, :]
                return predictions

            summary_op = tf.merge_all_summaries()
            init_op = tf.initialize_all_variables()
            print('Initialized!')
        sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
                                 init_op=init_op,
                                 summary_op=summary_op,
                                 global_step=batch)
        start_time = time.time()
        with sv.prepare_or_wait_for_session(server.target,
                                            config=None) as sess:

            for step in xrange(int(num_epochs * train_size) // BATCH_SIZE):
                offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
                batch_data = train_data[offset:(offset + BATCH_SIZE), ...]
                batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
                feed_dict = {
                    train_data_node: batch_data,
                    train_labels_node: batch_labels
                }
                _, l, lr, predictions = sess.run(
                    [optimizer, loss, learning_rate, train_prediction],
                    feed_dict=feed_dict)
                if step % EVAL_FREQUENCY == 0:
                    elapsed_time = time.time() - start_time
                    start_time = time.time()
                    print('Step %d (epoch %.2f), %.1f ms' %
                          (step, float(step) * BATCH_SIZE / train_size,
                           1000 * elapsed_time / EVAL_FREQUENCY))
                    print('Minibatch loss: %.3f, learning rate: %.6f' %
                          (l, lr))
                    print('Minibatch error: %.1f%%' %
                          error_rate(predictions, batch_labels))
                    print('Validation error: %.1f%%' %
                          error_rate(eval_in_batches(validation_data, sess),
                                     validation_labels))
                    sys.stdout.flush()
            test_error = error_rate(eval_in_batches(test_data, sess),
                                    test_labels)
            print('Test error: %.1f%%' % test_error)
            if FLAGS.self_test:
                print('test_error', test_error)
                assert test_error == 0.0, 'expected 0.0 test_error, got %.2f' % (
                    test_error, )
        sv.stop()
Example #49
0
    def __init__(self,
                 phase,
                 visualize,
                 output_dir,
                 batch_size,
                 initial_learning_rate,
                 steps_per_checkpoint,
                 model_dir,
                 target_embedding_size,
                 attn_num_hidden,
                 attn_num_layers,
                 clip_gradients,
                 max_gradient_norm,
                 session,
                 load_model,
                 gpu_id,
                 custom_cnn,
                 use_gru,
                 use_distance=True,
                 max_image_width=160,
                 max_image_height=60,
                 max_prediction_length=18,
                 channels=1,
                 reg_val=0):

        self.use_distance = use_distance
        self.start_time = str(int(time.time()))

        # We need resized width, not the actual width
        max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT

        self.max_original_width = max_image_width
        self.max_width = int(math.ceil(max_resized_width))

        self.encoder_size = int(math.ceil(1. * self.max_width / 4))
        self.decoder_size = max_prediction_length + 2
        self.buckets = [(self.encoder_size, self.decoder_size)]

        if gpu_id >= 0:
            device_id = '/gpu:' + str(gpu_id)
        else:
            device_id = '/cpu:0'
        self.device_id = device_id

        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

        if phase == 'test':
            batch_size = 1

        logging.info('phase: %s', phase)
        logging.info('model_dir: %s', model_dir)
        logging.info('load_model: %s', load_model)
        logging.info('output_dir: %s', output_dir)
        logging.info('steps_per_checkpoint: %d', steps_per_checkpoint)
        logging.info('batch_size: %d', batch_size)
        logging.info('learning_rate: %f', initial_learning_rate)
        logging.info('reg_val: %d', reg_val)
        logging.info('max_gradient_norm: %f', max_gradient_norm)
        logging.info('clip_gradients: %s', clip_gradients)
        logging.info('max_image_width %f', max_image_width)
        logging.info('max_prediction_length %f', max_prediction_length)
        logging.info('channels: %d', channels)
        logging.info('target_embedding_size: %f', target_embedding_size)
        logging.info('attn_num_hidden: %d', attn_num_hidden)
        logging.info('attn_num_layers: %d', attn_num_layers)
        logging.info('visualize: %s', visualize)

        if use_gru:
            logging.info('using GRU in the decoder.')

        self.reg_val = reg_val
        self.sess = session
        self.steps_per_checkpoint = steps_per_checkpoint
        self.model_dir = model_dir
        self.output_dir = output_dir
        self.batch_size = batch_size
        self.global_step = tf.Variable(0, trainable=False)
        self.phase = phase
        self.visualize = visualize
        self.learning_rate = initial_learning_rate
        self.clip_gradients = clip_gradients
        self.channels = channels

        train_config_file = "history/training_config" + self.start_time + ".txt"
        with open(train_config_file, "w") as f:
            print('model_dir: ', model_dir, file=f)
            print('phase: ', phase, file=f)
            print('load_model: ', load_model, file=f)
            print('output_dir: ', output_dir, file=f)
            print('steps_per_checkpoint: ', steps_per_checkpoint, file=f)
            print('batch_size: ', batch_size, file=f)
            print('learning_rate: ', initial_learning_rate, file=f)
            print('reg_val: ', reg_val, file=f)
            print('max_gradient_norm: ', max_gradient_norm, file=f)
            print('clip_gradients: ', clip_gradients, file=f)
            print('max_image_width ', max_image_width, file=f)
            print('max_prediction_length ', max_prediction_length, file=f)
            print('channels: ', channels, file=f)
            print('target_embedding_size: ', target_embedding_size, file=f)
            print('attn_num_hidden: ', attn_num_hidden, file=f)
            print('attn_num_layers: ', attn_num_layers, file=f)
            print('visualize: ', visualize, file=f)

        if phase == 'train':
            self.forward_only = False
        else:
            self.forward_only = True

        with tf.device(device_id):

            self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32)
            self.height_float = tf.constant(DataGen.IMAGE_HEIGHT,
                                            dtype=tf.float64)

            self.img_pl = tf.placeholder(tf.string,
                                         name='input_image_as_bytes')
            self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1),
                                    lambda: tf.expand_dims(self.img_pl, 0),
                                    lambda: self.img_pl)
            self.img_data = tf.map_fn(self._prepare_image,
                                      self.img_data,
                                      dtype=tf.float32)
            num_images = tf.shape(self.img_data)[0]

            # TODO: create a mask depending on the image/batch size
            self.encoder_masks = []
            for i in xrange(self.encoder_size + 1):
                self.encoder_masks.append(tf.tile([[1.]], [num_images, 1]))

            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(self.decoder_size + 1):
                self.decoder_inputs.append(tf.tile([1], [num_images]))
                if i < self.decoder_size:
                    self.target_weights.append(tf.tile([1.], [num_images]))
                else:
                    self.target_weights.append(tf.tile([0.], [num_images]))

            if custom_cnn:
                cnn_model = CNN_cust(self.img_data, not self.forward_only)
            else:
                cnn_model = CNN(self.img_data, not self.forward_only)
            self.conv_output = cnn_model.tf_output()
            self.perm_conv_output = tf.transpose(self.conv_output,
                                                 perm=[1, 0, 2])
            self.attention_decoder_model = Seq2SeqModel(
                encoder_masks=self.encoder_masks,
                encoder_inputs_tensor=self.perm_conv_output,
                decoder_inputs=self.decoder_inputs,
                target_weights=self.target_weights,
                target_vocab_size=len(DataGen.CHARMAP),
                buckets=self.buckets,
                target_embedding_size=target_embedding_size,
                attn_num_layers=attn_num_layers,
                attn_num_hidden=attn_num_hidden,
                forward_only=self.forward_only,
                use_gru=use_gru)

            table = tf.contrib.lookup.MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value="",
                checkpoint=True,
            )

            insert = table.insert(
                tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64),
                tf.constant(DataGen.CHARMAP),
            )

            with tf.control_dependencies([insert]):
                num_feed = []
                prb_feed = []

                for line in xrange(len(self.attention_decoder_model.output)):
                    guess = tf.argmax(
                        self.attention_decoder_model.output[line], axis=1)
                    proba = tf.reduce_max(tf.nn.softmax(
                        self.attention_decoder_model.output[line]),
                                          axis=1)
                    num_feed.append(guess)
                    prb_feed.append(proba)

                # Join the predictions into a single output string.
                trans_output = tf.transpose(num_feed)
                trans_output = tf.map_fn(
                    lambda m: tf.foldr(
                        lambda a, x: tf.cond(
                            tf.equal(x, DataGen.EOS_ID),
                            lambda: '',
                            lambda: table.lookup(x) + a  # pylint: disable=undefined-variable
                        ),
                        m,
                        initializer=''),
                    trans_output,
                    dtype=tf.string)

                # Calculate the total probability of the output string.
                trans_outprb = tf.transpose(prb_feed)
                trans_outprb = tf.gather(trans_outprb,
                                         tf.range(tf.size(trans_output)))
                trans_outprb = tf.map_fn(lambda m: tf.foldr(
                    lambda a, x: tf.multiply(tf.cast(x, tf.float64), a),
                    m,
                    initializer=tf.cast(1, tf.float64)),
                                         trans_outprb,
                                         dtype=tf.float64)

                self.prediction = tf.cond(
                    tf.equal(tf.shape(trans_output)[0], 1),
                    lambda: trans_output[0],
                    lambda: trans_output,
                )
                self.probability = tf.cond(
                    tf.equal(tf.shape(trans_outprb)[0], 1),
                    lambda: trans_outprb[0],
                    lambda: trans_outprb,
                )

                self.prediction = tf.identity(self.prediction,
                                              name='prediction')
                self.probability = tf.identity(self.probability,
                                               name='probability')

            if not self.forward_only:  # train
                self.updates = []
                self.summaries_by_bucket = []

                params = tf.trainable_variables()
                opt = tf.train.AdadeltaOptimizer(
                    learning_rate=initial_learning_rate)
                loss_op = self.attention_decoder_model.loss

                if self.reg_val > 0:
                    reg_losses = tf.get_collection(
                        tf.GraphKeys.REGULARIZATION_LOSSES)
                    logging.info('Adding %s regularization losses',
                                 len(reg_losses))
                    logging.debug('REGULARIZATION_LOSSES: %s', reg_losses)
                    loss_op = self.reg_val * tf.reduce_sum(
                        reg_losses) + loss_op

                gradients, params = list(
                    zip(*opt.compute_gradients(loss_op, params)))
                if self.clip_gradients:
                    gradients, _ = tf.clip_by_global_norm(
                        gradients, max_gradient_norm)

                # Summaries for loss, variables, gradients, gradient norms and total gradient norm.
                summaries = [
                    tf.summary.scalar("loss", loss_op),
                    tf.summary.scalar("total_gradient_norm",
                                      tf.global_norm(gradients))
                ]
                all_summaries = tf.summary.merge(summaries)
                self.summaries_by_bucket.append(all_summaries)

                # update op - apply gradients
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.updates.append(
                        opt.apply_gradients(list(zip(gradients, params)),
                                            global_step=self.global_step))

        self.saver_all = tf.train.Saver(tf.all_variables())
        self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt")

        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and load_model:
            # pylint: disable=no-member
            logging.info("Reading model parameters from %s",
                         ckpt.model_checkpoint_path)
            self.saver_all.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            logging.info("Created model with fresh parameters.")
            self.sess.run(tf.initialize_all_variables())
Example #50
0
def train(x_tr, y_tr, x_va, y_va, x_te, BATCH_SIZE, TEST_SIZE, EPOCH,
          SAMPLE_NUM, NUM_LABELS, REGULARIZER_RATE, LEARNING_RATE,
          MODEL_SAVE_PATH, MODEL_NAME, TIMEPNG_NAME, ACCPNG_NAME, LOG_NAME,
          PREOUT_NAME):
    '''
    x_tr:np.array4D,[samples,height,width,channel]
    y_tr_tensor:tf.tensor,[samples,labels]
    BATCH_SIZE:int,训练批量
    TRAINING_STEPS:int,训练次数
    SAMPLE_NUM:int,训练数据个数
    NUM_LABELS:int,类别数
    REGULARIZER_RATE:np.float,正则化系数
    MOVING_AVERAGE_DECAY:np.float,滑动平均损失
    LEARNING_RATE_BASE:np.float,基本学习率
    sdasdsaxzcxzssqssLEARNING_RATE_DECAY:np.float,学习率损失率
    MODEL_SAVE_PATH:模型文件存放路径
    MODEL_NAME:模型名称
    CONV1_SIZE:int,卷积层1核的大小
    NUM_CHANNELS:int,输入数据的通道数
    CONV1_DEEP:int,sadsadsad:w
    
    卷积层1核的数目
    CONV2_SIZE:int,卷积层2核的大小
    CONV2_DEEP:int,卷积层2核的数目
    FC_SIZE:int,全连接层隐含层大小
    '''
    MODEL_PATH = MODEL_SAVE_PATH
    accuracy_report = []
    loss_report = []
    time_report = []

    g2 = tf.Graph()
    with g2.as_default():
        x_holder = tf.placeholder(
            tf.float32, [None, x_tr.shape[1], x_tr.shape[2], NUM_CHANNELS],
            name='x-input')
        y_holder = tf.placeholder(tf.float32, [None, NUM_LABELS],
                                  name='y-input')
        regularizer = tf.contrib.layers.l2_regularizer(REGULARIZER_RATE)
        sub_pred, pred = inference(NUM_LABELS, x_holder, regularizer, False,
                                   True)

        #    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)

        #    variable_averages_op = variable_averages.apply(tf.trainable_variables())

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=pred, labels=tf.argmax(y_holder, 1))
        cross_entropy_sum = tf.reduce_sum(cross_entropy)
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))

        #    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,int(SAMPLE_NUM/BATCH_SIZE),LEARNING_RATE_DECAY)
        opt = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        grads_and_vars = opt.compute_gradients(loss, tf.trainable_variables())
        capped_grads_and_vars = [(MyCapper(gv[0]), gv[1])
                                 for gv in grads_and_vars]
        #    train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss,global_step=global_step)
        train_op = opt.apply_gradients(capped_grads_and_vars)
        correction_prediction = tf.equal(tf.argmax(y_holder, 1),
                                         tf.argmax(pred, 1))
        accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
        out = tf.argmax(pred, 1)
        init = tf.initialize_all_variables()
        g2.finalize()
    #    with tf.control_dependencies([train_step,variable_averages_op]):
    #        train_op = tf.no_op(name='train')

    #    saver = tf.train.Saver()

    with tf.Session(graph=g2) as sess2:
        sess2.run(init)
        epoch = 0
        max_batch = x_tr.shape[0] / BATCH_SIZE
        while epoch <= EPOCH:
            batch = 0
            while batch * BATCH_SIZE <= x_tr.shape[0]:
                if (batch + 1) * BATCH_SIZE <= x_tr.shape[0]:
                    xs = x_tr[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE, :]
                    ys = y_tr[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE, :]
                    batch += 1
                else:
                    xs = x_tr[batch * BATCH_SIZE:x_tr.shape[0], :]
                    ys = y_tr[batch * BATCH_SIZE:x_tr.shape[0], :]
                    batch += 1
                    print('will break')

                start_time = time.time()
                _, cem_value, loss_value = sess2.run(
                    [train_op, cross_entropy_mean, loss],
                    feed_dict={
                        x_holder: xs,
                        y_holder: ys
                    })
                end_time = time.time()

                #   测试与输出频率
                if batch % 100 == 0:
                    log = 'epoch:%d/%d    batch:%d/%d    lr:%s    cem:%f   ce+l2:%f   time:%f' % (
                        epoch, EPOCH, batch, max_batch, LEARNING_RATE,
                        cem_value, loss_value, end_time - start_time)
                    print log
                    f = open(LOG_NAME, 'a')
                    f.write(log + '\n')
                    f.close()
            epoch += 1

            accuracy_i_list = []
            for i in range(TEST_SIZE):
                if i % 500 == 0:
                    print('accuracy_calu:%d/%d' % (i, TEST_SIZE))
                x_va_i = np.reshape(
                    x_va[i, :, :, :],
                    [1, x_va.shape[1], x_va.shape[2], x_va.shape[3]])
                y_va_i = np.reshape(y_va[i, :], [1, y_va.shape[1]])
                accuracy_i = sess2.run(accuracy,
                                       feed_dict={
                                           x_holder: x_va_i,
                                           y_holder: y_va_i
                                       })
                accuracy_i_list.append(accuracy_i)
            accuracy_score = np.mean(np.array(accuracy_i_list))
            print accuracy_score
            f = open(LOG_NAME, 'a')
            f.write(str(accuracy_score) + '\n')
            f.close()
            accuracy_report.append(accuracy_score)
            plot_report1(accuracy_report, ACCPNG_NAME)
#                saver.save(sess,os.path.join(MODEL_SAVE_PATH,MODEL_NAME),global_step=global_step)

        sub_list = []
        for i in range(x_tr.shape[0]):
            if i % 500 == 0:
                print('o_list:%d/%d' % (i, x_tr.shape[0]))
            o_i_x = np.reshape(
                x_tr[i, :, :, :],
                [1, x_tr.shape[1], x_tr.shape[2], x_tr.shape[3]])
            o_i_y = np.reshape(y_tr[i, :], [1, y_tr.shape[1]])
            sub_i = sess2.run(sub_pred,
                              feed_dict={
                                  x_holder: o_i_x,
                                  y_holder: o_i_y
                              })
            sub_list.append(sub_i)
        sub_array = np.vstack(sub_list)
        np.save(PREOUT_NAME, sub_array)
Example #51
0
    def train(self, config):
        """Train DCGAN"""
        # first setup validation data
        data = sorted(glob(os.path.join("./data", config.dataset, "valid", "*.jpg")))

        g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
                          .minimize(self.g_loss, var_list=self.g_vars)
        tf.initialize_all_variables().run()

        self.saver = tf.train.Saver()
        self.g_sum = tf.summary.merge([self.G_sum, self.g_loss_sum])
        self.writer = tf.summary.FileWriter("./logs", self.sess.graph)

        sample_files = data[0:self.sample_size]
        sample = [get_image(sample_file, self.image_size, is_crop=self.is_crop) for sample_file in sample_files]
        sample_inputs = [doresize(xx, [self.input_size,]*2) for xx in sample]
        sample_images = np.array(sample).astype(np.float32)
        sample_input_images = np.array(sample_inputs).astype(np.float32)

        save_images(sample_input_images, [8, 8], './samples/inputs_small.png')
        save_images(sample_images, [8, 8], './samples/reference.png')

        counter = 1
        start_time = time.time()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        # we only save the validation inputs once
        have_saved_inputs = False

        for epoch in xrange(config.epoch):
            data = sorted(glob(os.path.join("./data", config.dataset, "train", "*.jpg")))
            batch_idxs = min(len(data), config.train_size) // config.batch_size

            for idx in xrange(0, batch_idxs):
                batch_files = data[idx*config.batch_size:(idx+1)*config.batch_size]
                batch = [get_image(batch_file, self.image_size, is_crop=self.is_crop) for batch_file in batch_files]
                input_batch = [doresize(xx, [self.input_size,]*2) for xx in batch]
                batch_images = np.array(batch).astype(np.float32)
                batch_inputs = np.array(input_batch).astype(np.float32)

                # Update G network
                _, summary_str, errG = self.sess.run([g_optim, self.g_sum, self.g_loss],
                    feed_dict={ self.inputs: batch_inputs, self.images: batch_images })
                self.writer.add_summary(summary_str, counter)

                counter += 1
                print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f" \
                    % (epoch, idx, batch_idxs,
                        time.time() - start_time, errG))

                if np.mod(counter, 100) == 1:
                    samples, g_loss, up_inputs = self.sess.run(
                        [self.G, self.g_loss, self.up_inputs],
                        feed_dict={self.inputs: sample_input_images, self.images: sample_images}
                    )
                    if not have_saved_inputs:
                        save_images(up_inputs, [8, 8], './samples/inputs.png')
                        have_saved_inputs = True
                    save_images(samples, [8, 8],
                                './samples/valid_%s_%s.png' % (epoch, idx))
                    print("[Sample] g_loss: %.8f" % (g_loss))

                if np.mod(counter, 500) == 2:
                    self.save(config.checkpoint_dir, counter)
Example #52
0
    def initialize(self):
        
        n_series = self.n_series
        features = self.features
        serie_length = self.serie_length
        
        # the last action performed (default is 0 - Neutral)
        old_action = 0.
        # this is the content of the lstm cells at time -1
        lstm = np.zeros((1,self.nLSTMCells)).astype(np.float32)
        # this is the output of the lstm at time -1
        lstm_out = np.zeros((1,self.nLSTMCells)).astype(np.float32)
        
        # output of the network (decision) through time
        out = []
        # reward through time
        reward = []
        
        #Stock price derivative
        Z = []
        for _ in range(serie_length):
            Z.append(tf.placeholder("float", [None,n_series]))
        
        #Stock cost
        C = []
        for _ in range(serie_length):
            C.append(tf.placeholder("float", [None,n_series]))
                    
        #Features
        F = []
        for _ in range(serie_length):
            F.append(tf.placeholder("float", [None,features]))
        
        # unfold through time
        for t in xrange(serie_length):
            # As we remember the shape of the dataset is (n_row, length, n_series, n_features)
            # each Z should be a vertical vector (n_row, 1, n_series, n_features)
            
            # Merge of the input
            
            print "Unfold: ", t+1, "out of", serie_length
            self.inputShared1_var = []
            inputShared1 = Merge([self.norm_prices(Z[t]),self.norm_costs(C[t]),self.norm_features(F[t])],[n_series,n_series,features],n_series*2 + features,tf.tanh,self.inputShared1_var)
            
            self.sharedBlock1_var = []
            # Shared block 1: elaboration of the input
            sharedBlock1 = Block(inputShared1, n_series*2 + features , [self.sharedBoxShape[1]]*self.sharedBoxShape[0], tf.tanh, self.sharedBlock1_var , dropout=self.dropout)
            
            self.inputShared2_var = []
            # Features given by shared1 and lstm
            inputShared2 = Merge([sharedBlock1, lstm_out]
                ,[self.sharedBoxShape[1],self.nLSTMCells]
                , self.sharedBoxShape[1] ,tf.tanh, self.inputShared2_var)   

            self.sharedBlock2_var = []
            sharedBlock2 = Block(inputShared2, self.sharedBoxShape[1], [self.sharedBoxShape[1]] * self.sharedBoxShape[0], tf.tanh, self.sharedBlock2_var, dropout=self.dropout)
            
            # Each block represent a gate for the LSTM Cells
            self.block1_var = []
            self.block2_var = []
            self.block3_var = []
            self.block4_var = []            
            block1 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block1_var, dropout=self.dropout)
            block2 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block2_var, dropout=self.dropout)
            block3 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block3_var, dropout=self.dropout)
            block4 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block4_var, dropout=self.dropout)
            
            #LSTM cells
            lstm, lstm_out = Lstm(block1, block2, block3, block4, lstm)
            self.outerBlock_var = []
            outerBlock = Block(lstm_out, self.nLSTMCells, [self.decisionBlockShape[1]] * self.decisionBlockShape[0] + [n_series], tf.tanh, self.outerBlock_var, dropout=self.dropout)
            
            out_temp = outerBlock
            out.append(outerBlock)
        
            reward.append(tf.reduce_sum(d(old_action,out_temp, self.denorm_prices(Z[t]), self.denorm_costs(C[t]))))
            
            old_action = out_temp
    
        r = 0.
        for i in xrange(serie_length):
            r = r + tf.reduce_sum(reward[i])

        # we should max r, or the same min -r
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(-r)
        self.tot_reward = r
        self.out = out
        
        self.Z = Z
        self.C = C
        self.F = F
        
        init = tf.initialize_all_variables()
        self.session = tf.Session()
        self.session.run(init)
Example #53
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 0.7
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONS
        convs_list_flat = []
        for L_conv1 in range(
                3, 101, 9
        ):  # try conv kernels up to some fraction of the dna sequence length 101
            n_conv1 = 768
            conv1 = convolution2d(x_image,
                                  n_conv1, [L_conv1, 4],
                                  padding='VALID',
                                  normalizer_fn=None)
            conv1_drop_len = int(101 - L_conv1 + 1)
            conv1_pool = max_pool2d(conv1, [conv1_drop_len, 1],
                                    [conv1_drop_len, 1])  # global max-pooling
            #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
            conv1_flat = flatten(conv1_pool)
            convs_list_flat.append(conv1_flat)

        # LINEAR FC LAYER
        conv_flat = tf.concat(1, convs_list_flat)
        y_conv = fully_connected(conv_flat, 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
Example #54
0
def train(mnist):
    # x为输入的图片点阵信号
    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE],
                       name="x-input")
    # y_为输入的目标分类信号
    y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE],
                        name="y-input")
    # 正则化函数使用L2正则化
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)

    # 直接使用mnist_inference.py中定义的前向传播过程(可以认为,前向传播的过程是完全一致的)
    y = mnist_inference.inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)

    # 和5.2.1小节样例中类似地定义损失函数、学习率、滑动平均操作以及训练过程
    # 这部分开始都是在定义反向传播
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    '''
    求输出的softmax回归结果,并且对其进行交叉熵运算
    labels = tf.argmax(*, 1)表示对每一行的最大值取其数组下标, 其矩阵形状为[BATCH_SIZE, 1]
    logits = y接受的是前向传播的预测结果,其矩阵形状为[BATCH_SIZE, CLASS_NUM]
    该函数简化了交叉熵的计算过程(由于one-hot非正确的标签都为0,所以都可以忽略计算), 交叉熵 = -log(y正确下标)
    '''
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.argmax(y_, 1), logits=y)
    '''
    reduce_mean函数的作用是降维操作,由于计算好交叉熵以后的矩阵形状为[BATCH_SIZE, 1],但损失函数只有一个单值
    因此需要把BATCH_SIZE方向上的纬度降下来
    '''
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    '''
    总的损失函数应该等于交叉熵的平均值 + L2正则化的和,其中L2正则化结果也需要全部贾总起来
    正则化的主要目的是为了防止过拟合现象
    '''
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))
    '''
    tf.train.exponential_decay函数的主要作用是生成每一步的学习率(根据以下公式)
    decayed_learning_rate=
    learning_rate * decay_rate ^ (global_step/decay_steps)
    '''
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
        .minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name="train")

    # 初始化Tensorflow持久化类
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()

        # 在训练过程中不再测试模型在验证数据上的表现,验证和测试的过程将会有一个独立的程序来完成
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step],
                                           feed_dict={
                                               x: xs,
                                               y_: ys
                                           })

            # 每1000轮保存一次模型
            if i % 1000 == 0:
                '''
                输出当前的训练情况。 这里指输出了模型在当前训练batch上的损失函数大小。
                通过损失函数的大小可以大概了解训练的情况。在验证数据集上的正确率信息
                会有一个单独的程序来生成
                '''
                print("After %d training step(s), \
                    loss on training batch is %g." % (step, loss_value))
                '''
                保存当前的模型。 注意这里给出了global_step参数,这样可以让每个被保存
                模型的文件名末尾加上训练的轮数,比如"model.ckpt-1000"表示训练1000轮
                之后得到的模型
                '''
                saver.save(sess,
                           os.path.join(MODEL_SAVE_PATH, MODEL_NAME),
                           global_step=global_step)
Example #55
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Get images and labels for CIFAR-10.
        images, labels = cifar10.distorted_inputs(False)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = cifar10.inference(images)

        # Calculate loss.
        loss = cifar10.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = cifar10.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Example #56
0
 def _create_initialization(self):
     self.initialize = tf.initialize_all_variables()
Example #57
0
def trainNetwork(s, readout, sess):
    """ Train the artificial agent using Q-learning to play the pong game.
    Args:
        s: the current state formed by 4 frames of the playground.
        readout: the Q value for each passible action in the current state.
        sess: session
    """

    # Placeholder for the action.
    a = tf.placeholder("float", [None, ACTIONS])

    # Placeholder for the target Q value.
    y = tf.placeholder("float", [None])

    # Compute the loss.
    cost = compute_cost(y, a, readout)

    # Training operation.
    train_step = tf.train.AdamOptimizer(Lr).minimize(cost)

    # Open up a game state to communicate with emulator.
    game_state = game.GameState()

    # Initialize the replay memory.
    D = deque()

    # Initialize the action vector.
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1

    # Initialize the state of the game.
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    # Save and load model checkpoints.
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    checkpoint = tf.train.get_checkpoint_state("saved_networks_q_learning")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

    # Initialize the epsilon value for the exploration phase.
    epsilon = INITIAL_EPSILON

    # Initialize the iteration counter.
    t = 0

    while True:
        # Choose an action epsilon-greedily.
        readout_t = readout.eval(feed_dict={s: [s_t]})[0]

        action_index = get_action_index(readout_t, epsilon, t)

        a_t = np.zeros([ACTIONS])

        a_t[action_index] = 1

        # Scale down epsilon during the exploitation phase.
        epsilon = scale_down_epsilon(epsilon, t)

        # Run the selected action and update the replay memeory
        for i in range(0, K):
            # Run the selected action and observe next state and reward.
            s_t1, r_t, terminal = run_selected_action(a_t, s_t, game_state)

            # Store the transition in the replay memory D.
            D.append((s_t, a_t, r_t, s_t1, terminal))
            if len(D) > REPLAY_MEMORY:
                D.popleft()

        # Start training once the observation phase is over.
        if (t > OBSERVE):

            # Sample a minibatch to train on.
            minibatch = random.sample(D, BATCH)

            # Get the batch variables.
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]
            terminal_batch = [d[4] for d in minibatch]

            # Compute the target Q-Value
            readout_j1_batch = readout.eval(feed_dict={s: s_j1_batch})
            target_q_batch = compute_target_q(r_batch, readout_j1_batch,
                                              terminal_batch)

            # Perform gradient step.
            train_step.run(feed_dict={
                y: target_q_batch,
                a: a_batch,
                s: s_j_batch
            })

        # Update the state.
        s_t = s_t1

        # Update the number of iterations.
        t += 1

        # Save a checkpoint every 10000 iterations.
        if t % 10000 == 0:
            saver.save(sess,
                       'saved_networks_q_learning/' + GAME + '-dqn',
                       global_step=t)

        # Print info.
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"
        print("TIMESTEP", t, "/ STATE", state, "/ EPSILON", epsilon,
              "/ ACTION", action_index, "/ REWARD", r_t,
              "/ Q_MAX %e" % np.max(readout_t))
Example #58
0
def train():
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    ########################
    # Get images and labels
    images_tr, labels_tr = mnistip.distorted_inputs(randFlip=False)
    images_ev, labels_ev = mnistip.inputs(eval_data=True)
    
    ########################
    # VAE ZONE
    images = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh])
    # vae_code = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, CODE_LEN])
    
    # Define Encoder
    z_mean, z_stddev, t_num_dim = ved.recognition(images, CODE_LEN)
    
    # Draw new sample
    samples = tf.random_normal([FLAGS.batch_size,CODE_LEN],0,1,dtype=tf.float32)
    guessed_z = z_mean + (z_stddev * samples)
    
    # Define Decoder
    im_gen = ved.generation(guessed_z, t_num_dim)
    
    # Compute Loss Values
    generation_loss = -tf.reduce_sum(images * tf.log(1e-8 + im_gen) + (1-images) * tf.log(1e-8 + 1 - im_gen),[1,2,3])
    latent_loss = 0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(z_stddev) - tf.log(tf.square(z_stddev)) - 1,1)
    total_loss = tf.reduce_mean(generation_loss + latent_loss)
    
    # Optimize now
    train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss)
    
    #####################
    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    #########################
    visualization, _ = sess.run([images_ev, labels_ev])
    reshaped_vis = np.squeeze(visualization)
    ims("results/base.jpg",merge(reshaped_vis[:64],[8,8]))
        
    for step in xrange(FLAGS.max_steps):
      _images, _labels = sess.run([images_tr, labels_tr])
      
      _, lossGen, lossLat = sess.run([train_op, generation_loss, latent_loss], feed_dict={images: _images})

      if step % 20 == 0:
          format_str = ('%s: Step %d, GEN-loss = %.2f, LAT-loss = %.2f\n')
          print (format_str % (datetime.now(), step, np.mean(lossGen), np.mean(lossLat)))     
          
          # save intermediate results
          generated_test = sess.run(im_gen, feed_dict={images: visualization})
          generated_test = np.squeeze(generated_test)
          ims("results/"+str(step)+".jpg",merge(generated_test[:64],[8,8]))
    def run(self):
        inputs = tf.nn.embedding_lookup(self.word_embedding, self.x)
        prob = self.model(inputs)

        #修改

        with tf.name_scope('loss'):
            cost = - tf.reduce_mean(self.y * tf.log(prob))
            # reg, variables = tf.nn.l2_loss(self.word_embedding), ['softmax']
            # for vari in variables:

            reg = tf.nn.l2_loss(self.weights['softmax']) + \
                    tf.nn.l2_loss(self.biases['softmax'])

            reg += tf.nn.l2_loss(self.weights['w_1']) + \
                    tf.nn.l2_loss(self.biases['w_1'])
            
            reg += tf.nn.l2_loss(self.weights['u_1']) 

            cost += reg * self.l2_reg

            #relationship            
            relation = 0.
            tmp = self.weights['softmax']
            # omiga = np.zeros([8, 8])
            omiga = np.array([[1, -0.5, 0.5, -1, 0, -0.5, 0, 0.5],
                              [-0.5, 1, -1, 0.5, -0.5, 0, 0.5, 0],
                              [0.5, -1, 1, -0.5, 0.5, 0, -0.5, 0],
                              [-1, 0.5, -0.5, 1, 0, 0.5, 0, -0.5],
                              [0, -0.5, 0.5, 0, 1, 0.5, -1, -0.5],
                              [-0.5, 0, 0, 0.5, 0.5, 1, -0.5, -1],
                              [0, 0.5, -0.5, 0, -1, -0.5, 1, 0.5],
                              [0.5, 0, 0, -0.5, -0.5, -1, 0.5, 1]                               
                                ])
            

            for i in range(8):
                for j in range(8):
                    relation += tf.nn.l2_loss(tmp[:,i] - tmp[:,j]) * omiga[i,j]
            cost += relation * self.rela_reg

        with tf.name_scope('train'):
            global_step = tf.Variable(
                0, name="tr_global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(cost, global_step=global_step)

        with tf.name_scope('predict'):
            correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32))

        with tf.name_scope('summary'):
            localtime = time.strftime("%X %Y-%m-%d", time.localtime())
            Summary_dir = 'Summary/' + localtime

            info = 'batch-{}, lr-{}, kb-{}, l2_reg-{}'.format(
                self.batch_size,  self.learning_rate, self.Keep_Prob, self.l2_reg)
            info = info + '\ntrain_file_path:' + self.train_file_path + '\ntest_index:' + str(self.test_index) + '\nembedding_type:' + str(self.embedding_type) + '\nMethod: Emotion_GRU'
            summary_acc = tf.scalar_summary('ACC ' + info, accuracy)
            summary_loss = tf.scalar_summary('LOSS ' + info, cost)
            summary_op = tf.merge_summary([summary_loss, summary_acc])

            test_acc = tf.placeholder(tf.float32)
            test_loss = tf.placeholder(tf.float32)
            summary_test_acc = tf.scalar_summary('ACC ' + info, test_acc)
            summary_test_loss = tf.scalar_summary('LOSS ' + info, test_loss)
            summary_test = tf.merge_summary(
                [summary_test_loss, summary_test_acc])

            train_summary_writer = tf.train.SummaryWriter(
                Summary_dir + '/train')
            test_summary_writer = tf.train.SummaryWriter(Summary_dir + '/test')

        with tf.name_scope('saveModel'):
            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            save_dir = 'Models/' + localtime + '/'
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

        with tf.name_scope('readData'):
            print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))
            tr_x, tr_y, tr_doc_len, te_x, te_y, te_doc_len, ev_x, ev_y, ev_doc_len= load_data_for_Emotion_CNN(
                self.train_file_path,
                self.word_id_mapping,
                self.max_doc_len,
                self.test_index,
                self.n_class
            )
            print 'train docs: {}    test docs: {}'.format(len(tr_y), len(te_y))
            print 'training_iter:', self.training_iter
            print info
            print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            max_acc, bestIter = 0., 0

            def test():
                acc, loss, cnt = 0., 0., 0
                for test, num in self.get_batch_data(te_x, te_y, te_doc_len, 20, keep_prob=1.0):
                    _loss, _acc = sess.run([cost, correct_num], feed_dict=test)
                    acc += _acc
                    loss += _loss * num
                    cnt += num
                loss = loss / cnt
                acc = acc / cnt
                return loss, acc

            def new_test():
                feed_dict = {
                    self.x: ev_x,
                    self.doc_len: ev_doc_len,
                    self.keep_prob: 1.0,
                }
                y_true = ev_y
                y_pred_p = sess.run(prob, feed_dict=feed_dict)
                # y_pred = np.ceil(y_pred_p-0.15)
                #y_pred  = calibrated_label_ranking(y_pred_p)
                y_pred = calibrated_label_ranking(y_pred_p, For_calibrated_B)
                Emotion_eval(y_true, y_pred, y_pred_p)

            if self.training_iter==0:
                saver.restore(sess, 'Models/10:01:44 2017-03-11/-856')
                loss, acc=test()
                print loss,acc
                new_test()

            For_calibrated_B = np.loadtxt('For_calibrated_B'+str(self.test_index)+'.txt', delimiter=',')
            for i in xrange(self.training_iter):

                for train, _ in self.get_batch_data(tr_x, tr_y, tr_doc_len, self.batch_size, self.Keep_Prob):
                    _, step, summary, loss, acc = sess.run(
                        [optimizer, global_step, summary_op, cost, accuracy], feed_dict=train)
                    train_summary_writer.add_summary(summary, step)
                    print 'Iter {}: mini-batch loss={:.6f}, acc={:.6f}'.format(step, loss, acc)

                if i % self.display_step == 0:
                    loss, acc=test()

                    if acc > max_acc:
                        max_acc = acc
                        bestIter = step
                        saver.save(sess, save_dir, global_step=step)
                        new_test()

                    summary = sess.run(summary_test, feed_dict={
                                       test_loss: loss, test_acc: acc})
                    test_summary_writer.add_summary(summary, step)
                    print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))
                    print 'Iter {}: test loss={:.6f}, test acc={:.6f}'.format(step, loss, acc)
                    print 'round {}: max_acc={} BestIter={}\n'.format(i, max_acc, bestIter)

            print 'Optimization Finished!'
Example #60
0
    def train(self, config):
        """Train DCGAN"""
        data = glob(os.path.join("./data", config.dataset, "*.jpg"))
        #np.random.shuffle(data)

        d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
                          .minimize(self.d_loss, var_list=self.d_vars)
        g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
                          .minimize(self.g_loss, var_list=self.g_vars)
        tf.initialize_all_variables().run()

        self.saver = tf.train.Saver()
        self.g_sum = tf.summary.merge([self.z_sum, self.d__sum,
            self.G_sum, self.d_loss_fake_sum, self.g_loss_sum])
        self.d_sum = tf.summary.merge([self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
        self.writer = tf.summary.FileWriter("./logs", self.sess.graph_def)

        sample_z = np.random.uniform(-1, 1, size=(self.sample_size , self.z_dim))
        sample_files = data[0:self.sample_size]
        sample = [get_image(sample_file, self.image_size, is_crop=self.is_crop) for sample_file in sample_files]
        sample_images = np.array(sample).astype(np.float32)

        counter = 1
        start_time = time.time()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        for epoch in range(config.epoch):
            data = glob(os.path.join("./data", config.dataset, "*.jpg"))
            batch_idxs = min(len(data), config.train_size)//config.batch_size

            for idx in range(0, batch_idxs):
                batch_files = data[idx*config.batch_size:(idx+1)*config.batch_size]
                batch = [get_image(batch_file, self.image_size, is_crop=self.is_crop) for batch_file in batch_files]
                batch_images = np.array(batch).astype(np.float32)

                batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
                            .astype(np.float32)

                # Update D network
                _, summary_str = self.sess.run([d_optim, self.d_sum],
                    feed_dict={ self.images: batch_images, self.z: batch_z })
                self.writer.add_summary(summary_str, counter)

                # Update G network
                _, summary_str = self.sess.run([g_optim, self.g_sum],
                    feed_dict={ self.z: batch_z })
                self.writer.add_summary(summary_str, counter)

                # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
                _, summary_str = self.sess.run([g_optim, self.g_sum],
                    feed_dict={ self.z: batch_z })
                self.writer.add_summary(summary_str, counter)

                errD_fake = self.d_loss_fake.eval({self.z: batch_z})
                errD_real = self.d_loss_real.eval({self.images: batch_images})
                errG = self.g_loss.eval({self.z: batch_z})

                counter += 1
                print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
                    % (epoch, idx, batch_idxs,
                        time.time() - start_time, errD_fake+errD_real, errG))

                if np.mod(counter, 100) == 1:
                    samples, d_loss, g_loss = self.sess.run(
                        [self.sampler, self.d_loss, self.g_loss],
                        feed_dict={self.z: sample_z, self.images: sample_images}
                    )
                    save_images(samples, [8, 8],
                                './samples/train_%s_%s.png' % (epoch, idx))
                    print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss))

                if np.mod(counter, 500) == 2:
                    self.save(config.checkpoint_dir, counter)