def testWhileUpdateVariable_5(self): with self.test_session(): # Create some variables. var_a = tf.Variable(0, name="a") var_b = tf.Variable(0, name="b") tf.initialize_all_variables().run() # Change condition to check var_b def pred(i): return tf.less(var_b, 10) # Change body to increment var_b def loop_body(i): asn1 = tf.assign_add(var_a, tf.constant(1), name="a_add") asn2 = tf.assign_add(var_b, tf.constant(1), name="b_add") with tf.control_dependencies([asn1, asn2]): inc_b = tf.identity(var_b) return inc_b lpa = control_flow_ops.While(pred, loop_body, [var_b], 1, name="loop") self.assertEqual(0, var_b.eval()) lpa.eval() # Run the loop self.assertEqual(10, var_a.eval()) self.assertEqual(10, var_b.eval())
def computeGraph(self): logging.debug("computeGraph") with tf.Session(graph=self.graph) as session: # This is a one-time operation which ensures the parameters get initialized as # we described in the graph: random weights for the matrix, zeros for the # biases. tf.initialize_all_variables().run() logging.debug('Initialized') for step in range(self.num_steps): # Run the computations. We tell .run() that we want to run the optimizer, # and get the loss value and the training predictions returned as numpy # arrays. _, l, predictions = session.run([self.optimizer, self.loss, self.train_prediction]) if (step % 100 == 0): logging.debug('Loss at step %d: %f' % (step, l)) logging.debug('Training accuracy: %.1f%%' % self.accuracy( predictions, self.train_labels)) # Calling .eval() on valid_prediction is basically like calling run(), but # just to get that one numpy array. Note that it recomputes all its graph # dependencies. logging.debug('Validation accuracy: %.1f%%' % self.accuracy(self.valid_prediction.eval(), self.valid_labels)) logging.debug('Test accuracy: %.1f%%' % self.accuracy(self.test_prediction.eval(), self.test_labels)) return
def testSharded(self): save_dir = os.path.join(self.get_temp_dir(), "max_to_keep_sharded") try: gfile.DeleteRecursively(save_dir) except gfile.GOSError as _: pass # Ignore gfile.MakeDirs(save_dir) with tf.Session( target="", config=tf.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): v0 = tf.Variable(111, name="v0") with sess.graph.device("/cpu:1"): v1 = tf.Variable(222, name="v1") save = tf.train.Saver({"v0": v0, "v1": v1}, sharded=True, max_to_keep=2) tf.initialize_all_variables().run() self.assertEqual([], save.last_checkpoints) s1 = save.save(sess, os.path.join(save_dir, "s1")) self.assertEqual([s1], save.last_checkpoints) self.assertEquals(2, len(gfile.Glob(s1))) s2 = save.save(sess, os.path.join(save_dir, "s2")) self.assertEqual([s1, s2], save.last_checkpoints) self.assertEquals(2, len(gfile.Glob(s1))) self.assertEquals(2, len(gfile.Glob(s2))) s3 = save.save(sess, os.path.join(save_dir, "s3")) self.assertEqual([s2, s3], save.last_checkpoints) self.assertEquals(0, len(gfile.Glob(s1))) self.assertEquals(2, len(gfile.Glob(s2))) self.assertEquals(2, len(gfile.Glob(s3)))
def testWhileUpdateVariable_6(self): with self.test_session(): # Create some variables. var_a = tf.Variable(0, name="a") var_b = tf.Variable(0, name="b") c = tf.constant(0) tf.initialize_all_variables().run() # Loop condition def pred(i): return tf.less(i, 10) # Loop body def loop_body(i): asn1 = tf.assign_add(var_a, 1, name="a_add") with tf.control_dependencies([asn1]): asn2 = tf.assign_add(var_b, var_a, name="b_add") with tf.control_dependencies([asn2]): ni = tf.add(i, 1, name="i_add") return ni lpa = control_flow_ops.While(pred, loop_body, [c], 1, name="loop") self.assertEqual(0, var_b.eval()) lpa.eval() # Run the loop self.assertEqual(55, var_b.eval()) self.assertEqual(10, var_a.eval())
def testInt64(self): save_path = os.path.join(self.get_temp_dir(), "int64") with self.test_session() as sess: # Build a graph with 1 node, and save and restore for them. v = tf.Variable(np.int64(15), name="v") save = tf.train.Saver({"v": v}, restore_sequentially=True) tf.initialize_all_variables().run() # Save the initialized values in the file at "save_path" val = save.save(sess, save_path) self.assertTrue(isinstance(val, six.string_types)) self.assertEqual(save_path, val) with self.test_session() as sess: v = tf.Variable(np.int64(-1), name="v") save = tf.train.Saver({"v": v}) with self.assertRaisesWithPredicateMatch( tf.OpError, lambda e: "uninitialized value v" in e.message): sess.run(v) # Restore the saved values in the parameter nodes. save.restore(sess, save_path) # Check that the parameter nodes have been restored. self.assertEqual(np.int64(15), v.eval())
def run_graph(device, input_shape, axes, num_layers, py, scale, train, num_iters): """Run the graph and print its execution time. Args: device: string, the device to run on. input_shape: shape of the input tensor. axes: axes that are to be normalized across. num_layers: number of batch normalization layers in the graph. py: whether to use the python implementation. scale: scale after normalization. train: if true, also run backprop. num_iters: number of steps to run. Returns: The duration of the run in seconds. """ graph = tf.Graph() with graph.as_default(): outputs = build_graph(device, input_shape, axes, num_layers, py, scale, train) with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() _ = session.run([out.op for out in outputs]) # warm up. start_time = time.time() for _ in range(num_iters): _ = session.run([out.op for out in outputs]) duration = time.time() - start_time print("%s shape:%d/%d #layers:%d python:%r scale:%r train:%r - %f secs" % (device, len(input_shape), len(axes), num_layers, py, scale, train, duration / num_iters)) return duration
def testBlockGRUToGRUCellSingleStep(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 seed = 1994 initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=seed) # Inputs x = tf.zeros([batch_size, input_size]) h = tf.zeros([batch_size, cell_size]) # Values for the inputs. x_value = np.random.rand(batch_size, input_size) h_value = np.random.rand(batch_size, cell_size) # Output from the basic GRU cell implementation. with tf.variable_scope("basic", initializer=initializer): output = tf.nn.rnn_cell.GRUCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) basic_res = sess.run([output], {x: x_value, h: h_value}) # Output from the block GRU cell implementation. with tf.variable_scope("block", initializer=initializer): output = gru_ops.GRUBlockCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) block_res = sess.run([output], {x: x_value, h: h_value}) self.assertEqual(len(block_res), len(basic_res)) for block, basic in zip(block_res, basic_res): self.assertAllClose(block, basic)
def testSparseBasic(self): for dtype in [tf.half, tf.float32]: with self.test_session(): var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) grads0 = tf.IndexedSlices( tf.constant([0.1], shape=[1, 1], dtype=dtype), tf.constant([0]), tf.constant([2, 1])) grads1 = tf.IndexedSlices( tf.constant([0.01], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) sgd_op = tf.train.GradientDescentOptimizer(3.0).apply_gradients( zip([grads0, grads1], [var0, var1])) tf.initialize_all_variables().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([[1.0], [2.0]], var0.eval()) self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params self.assertAllCloseAccordingToType( [[1.0 - 3.0 * 0.1], [2.0]], var0.eval()) self.assertAllCloseAccordingToType( [[3.0], [4.0 - 3.0 * 0.01]], var1.eval())
def enlargeDataset(images, byte_data, names, labels, is_hard): extendListEightTimes(labels) extendListEightTimes(names) extendListEightTimes(is_hard) with tf.Session() as sess: tf.initialize_all_variables().run() l = len(images) for j in range(7): print(l) train_data2 = [] start = time.time() for i in range(l): imageTensor = tf.image.random_contrast(images[i], 0.2, 1.8) imageTensor = tf.image.random_flip_left_right(imageTensor) imageTensor = tf.image.random_flip_up_down(imageTensor) imageTensor = tf.image.random_brightness(imageTensor, max_delta=50 / 255.0) imageTensor = tf.image.random_saturation(imageTensor, 0.2, 1.8) train_data2.append(imageTensor) print(time.time() - start) start = time.time() train_data2 = sess.run(train_data2) print(type(train_data2)) print('time2:', time.time() - start) print train_data2[0][16] for i in range(l): byte_data.extend(train_data2[i].flatten()) return byte_data, names, labels, is_hard
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in xrange(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in xrange(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start) if (e * data_loader.num_batches + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print "model saved to {}".format(checkpoint_path)
def main(argv): mapDict = getKanjiMap() with tf.Session() as sess: tf.initialize_all_variables().run() #restore variables from training process saver = tf.train.Saver(loadParam) saver.restore(sess, MODEL_NAME) for argc in range(1,len(sys.argv)): fName = sys.argv[argc] if os.path.isfile(fName): img = cv2.imread(fName,0) img=prepareImage(img) # to ensure that image has 0 mean and [-1:1] img = (img - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH img = img.reshape([1,IMAGE_SIZE,IMAGE_SIZE,1]) predictions = sess.run( eval_prediction, feed_dict={eval_data_node: img}) labelID = (np.argmax(predictions)) print("labelID: %d; Recognized Kanji:%s" %(labelID, mapDict[str(labelID)])) else: print("%s does not exist\n" %(fName)) continue
def testFtrlwithoutRegularization2(self): with self.test_session() as sess: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([4.0, 3.0]) grads0 = tf.constant([0.1, 0.2]) grads1 = tf.constant([0.01, 0.02]) opt = tf.train.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) tf.initialize_all_variables().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([4.0, 3.0], v1_val) # Run 3 steps FTRL for _ in range(3): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose(np.array([-2.55607247, -3.98729396]), v0_val) self.assertAllClose(np.array([-0.28232238, -0.56096673]), v1_val)
def testFtrlWithL1(self): with self.test_session() as sess: var0 = tf.Variable([1.0, 2.0]) var1 = tf.Variable([4.0, 3.0]) grads0 = tf.constant([0.1, 0.2]) grads1 = tf.constant([0.01, 0.02]) opt = tf.train.FtrlOptimizer(3.0, initial_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) tf.initialize_all_variables().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose([1.0, 2.0], v0_val) self.assertAllClose([4.0, 3.0], v1_val) # Run 10 steps FTRL for _ in range(10): update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllClose(np.array([-7.66718769, -10.91273689]), v0_val) self.assertAllClose(np.array([-0.93460727, -1.86147261]), v1_val)
def build_graph(self): """Build the graph for the full model.""" opts = self._options # The training data. A text file. (words, counts, words_per_epoch, self._epoch, self._words, examples, labels) = word2vec.skipgram(filename=opts.train_data, batch_size=opts.batch_size, window_size=opts.window_size, min_count=opts.min_count, subsample=opts.subsample) (opts.vocab_words, opts.vocab_counts, opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch]) opts.vocab_size = len(opts.vocab_words) print("Data file: ", opts.train_data) print("Vocab size: ", opts.vocab_size - 1, " + UNK") print("Words per epoch: ", opts.words_per_epoch) self._examples = examples self._labels = labels self._id2word = opts.vocab_words for i, w in enumerate(self._id2word): self._word2id[w] = i true_logits, sampled_logits = self.forward(examples, labels) loss = self.nce_loss(true_logits, sampled_logits) tf.scalar_summary("NCE loss", loss) self._loss = loss self.optimize(loss) # Properly initialize all variables. tf.initialize_all_variables().run() self.saver = tf.train.Saver()
def applyOptimizer(self, opt, steps=5, is_sparse=False): if is_sparse: var0 = tf.Variable([[0.0], [0.0]]) var1 = tf.Variable([[0.0], [0.0]]) grads0 = tf.IndexedSlices(tf.constant([0.1], shape=[1, 1]), tf.constant([0]), tf.constant([2, 1])) grads1 = tf.IndexedSlices(tf.constant([0.02], shape=[1, 1]), tf.constant([1]), tf.constant([2, 1])) else: var0 = tf.Variable([0.0, 0.0]) var1 = tf.Variable([0.0, 0.0]) grads0 = tf.constant([0.1, 0.2]) grads1 = tf.constant([0.01, 0.02]) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) tf.initialize_all_variables().run() sess = tf.get_default_session() v0_val, v1_val = sess.run([var0, var1]) if is_sparse: self.assertAllClose([[0.0], [0.0]], v0_val) self.assertAllClose([[0.0], [0.0]], v1_val) else: self.assertAllClose([0.0, 0.0], v0_val) self.assertAllClose([0.0, 0.0], v1_val) # Run Ftrl for a few steps for _ in range(steps): update.run() v0_val, v1_val = sess.run([var0, var1]) return v0_val, v1_val
def testDenseFeaturesSeparableWithinMargins(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0] ) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss") model = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints # are within the margins so there is unregularized loss (1/2 per example). # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which # gives an L2 loss of ~0.25. self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesWeightedExamples(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0] ) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss") model = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will # try to increase the margin from (1.0, 0.5). Due to regularization, # (1.0, -0.5) will be within the margin. For these points and example # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2 # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be # correct, but the boundary will be much closer to the 2nd point than the # first one. self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesWithArbitraryWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0] ) options = dict(symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type="squared_loss") lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # The loss function for these particular features is given by: # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 + # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It # turns out that the optimal (variable) weights are given by: # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3. # In this case the (unnormalized regularized) loss will be: # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The # actual loss should be further normalized by the sum of example weights. self.assertAllClose([8.0, -10.0 / 3], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
def testDenseFeaturesPerfectlySeparable(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0] ) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type="hinge_loss") model = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is, # the SVM's functional margin >=1), so the unregularized loss is ~0.0. # There is only loss due to l2-regularization. For these datapoints, it # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25. unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
def testL1Regularization(self): # Setup test data example_protos = [ make_example_proto({"age": [0], "gender": [0]}, -10.0), make_example_proto({"age": [1], "gender": [1]}, 14.0), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=4.0, loss_type="squared_loss") lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() prediction = lr.predictions(examples) loss = lr.regularized_loss(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # Predictions should be -4.0, 48/5 due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08) # Loss should be the sum of the regularized loss value from above per # example after plugging in the optimal weights. self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
def testFeatureValues(self): # Setup test data example_protos = [ make_example_proto({"age": [0], "gender": [0]}, -10.0, -2.0), make_example_proto({"age": [1], "gender": [1]}, 14.0, 2.0), ] example_weights = [5.0, 3.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss") lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1, # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the # corresponding *example* weights. With the given feature values, the loss # function is given by: # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2 # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it # can be verified that: # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to # regularization and example weights, the predictions are within: # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels. self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0], predictions.eval(), atol=0.01)
def testSimple(self): # Setup test data example_protos = [ make_example_proto({"age": [0], "gender": [0]}, -10.0), make_example_proto({"age": [1], "gender": [1]}, 14.0), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, loss_type="squared_loss") lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # Predictions should be 2/3 of label due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0], predictions.eval(), rtol=0.005) # Approximate gap should be very close to 0.0. (In fact, because the gap # is only approximate, it is likely that upon convergence the duality gap # can have a tiny negative value). self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), atol=1e-2)
def testImbalancedWithExampleWeights(self): # Setup test data with 1 positive, and 1 negative example. example_protos = [ make_example_proto({"age": [0], "gender": [0]}, 0), make_example_proto({"age": [1], "gender": [1]}, 1), ] example_weights = [3.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type="logistic_loss", ) lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08) self.assertAllClose(0.408044, loss.eval(), atol=0.012) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 1], predicted_labels.eval()) self.assertAllClose(0.0, lr.approximate_duality_gap().eval(), rtol=2e-2, atol=1e-2)
def testSparseBasic(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.test_session(): var0 = tf.Variable([[1.0], [2.0]], dtype=dtype) var1 = tf.Variable([[3.0], [4.0]], dtype=dtype) grads0 = tf.IndexedSlices( tf.constant([0.1], shape=[1, 1], dtype=dtype), tf.constant([0]), tf.constant([2, 1])) grads1 = tf.IndexedSlices( tf.constant([0.01], shape=[1, 1], dtype=dtype), tf.constant([1]), tf.constant([2, 1])) ada_opt = tf.train.AdagradOptimizer(3.0, initial_accumulator_value=0.1) ada_update = ada_opt.apply_gradients(zip( [grads0, grads1], [var0, var1])) tf.initialize_all_variables().run() # Fetch params to validate initial values self.assertAllClose([[1.0], [2.0]], var0.eval()) self.assertAllClose([[3.0], [4.0]], var1.eval()) # Run 3 step of sgd for _ in range(3): ada_update.run() # Validate updated params self.assertAllCloseAccordingToType( np.array([[-1.6026098728179932], [2.0]]), var0.eval()) self.assertAllCloseAccordingToType( np.array([[3.0], [3.715679168701172]]), var1.eval())
def testLSTMBasicToBlockPeeping(self): with self.test_session(use_gpu=self._use_gpu) as sess: batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 5 inputs = [] for _ in range(sequence_length): inp = tf.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=tf.float32) inputs.append(inp) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212) with tf.variable_scope("basic", initializer=initializer): cell = tf.nn.rnn_cell.LSTMCell(cell_size, use_peepholes=True, state_is_tuple=True) outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32) sess.run([tf.initialize_all_variables()]) basic_outputs = sess.run(outputs) basic_grads = sess.run(tf.gradients(outputs, inputs)) basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables())) with tf.variable_scope("block", initializer=initializer): w = tf.get_variable("w", shape=[input_size + cell_size, cell_size * 4], dtype=tf.float32) b = tf.get_variable("b", shape=[cell_size * 4], dtype=tf.float32, initializer=tf.zeros_initializer) wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32) wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32) wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32) _, _, _, _, _, _, outputs = fused_lstm( tf.convert_to_tensor(sequence_length, dtype=tf.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=0, use_peephole=True) sess.run([tf.initialize_all_variables()]) block_outputs = sess.run(outputs) block_grads = sess.run(tf.gradients(outputs, inputs)) block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco])) self.assertAllClose(basic_outputs, block_outputs) self.assertAllClose(basic_grads, block_grads) for basic, block in zip(basic_wgrads, block_wgrads): self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
def train_model(args): data_loader = InputHandler(args.data_dir, args.batch_size, args.result_length) args.vocabulary_size = data_loader.vocabulary_size # Save the original files, so that we can load the model when sampling with open(os.path.join(args.snapshots_dir, CONFIGURATION_FILE), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.snapshots_dir, WORDS_VOCABULARY_FILE), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocabulary), f) model = RNNModel(args.rnn_size, args.network_depth, args.batch_size, args.result_length, args.vocabulary_size, args.gradient) with tf.Session() as session: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): session.run(tf.assign(model.lr, args.training_rate * (args.decay_rate ** e))) data_loader.set_batch_pointer_to_zero() state = model.initial_state.eval() for b in range(data_loader.num_batches): x, y = data_loader.get_next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = session.run([model.cost, model.final_state, model.train_op], feed) if (e * data_loader.num_batches + b) % args.snapshot == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result snapshot_path = os.path.join(args.snapshots_dir, 'model.ckpt') saver.save(session, snapshot_path, global_step = e * data_loader.num_batches + b) print("Model snapshot was taken to {}".format(snapshot_path))
def testSharing(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.test_session(): var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) ada_opt = tf.train.AdagradOptimizer(3.0) # Apply the optimizer twice. Both applications will use # the same accums. ada_update1 = ada_opt.apply_gradients(zip( [grads0, grads1], [var0, var1])) ada_update2 = ada_opt.apply_gradients(zip( [grads0, grads1], [var0, var1])) self.assertEqual(["accumulator"], ada_opt.get_slot_names()) slot0 = ada_opt.get_slot(var0, "accumulator") self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = ada_opt.get_slot(var1, "accumulator") self.assertEquals(slot1.get_shape(), var1.get_shape()) tf.initialize_all_variables().run() # Fetch params to validate initial values. self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Mix the first and the second adagrad for 3 steps. ada_update1.run() ada_update2.run() ada_update1.run() # Validate updated params (the same as with only 1 Adagrad). self.assertAllCloseAccordingToType( np.array([-1.6026098728179932, -0.6026098728179932]), var0.eval()) self.assertAllCloseAccordingToType( np.array([2.715679168701172, 3.715679168701172]), var1.eval())
def testInstancesOfOneClassOnly(self): # Setup test data with 1 positive (ignored), and 1 negative example. example_protos = [ make_example_proto({"age": [0], "gender": [0]}, 0), make_example_proto({"age": [1], "gender": [0]}, 1), # Shares gender with the instance above. ] example_weights = [1.0, 0.0] # Second example "omitted" from training. for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type="logistic_loss", ) lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() unregularized_loss = lr.unregularized_loss(examples) loss = lr.regularized_loss(examples) predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllEqual([0, 0], predicted_labels.eval()) self.assertAllClose(0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
def testBasicLSTMCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 8]) g, out_m = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8])}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) # Test BasicLSTMCell with input_size != num_units. m = tf.zeros([1, 4]) g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]), m.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 2)
def testL2Regularization(self): # Setup test data example_protos = [ # 2 identical examples make_example_proto({"age": [0], "gender": [0]}, -10.0), make_example_proto({"age": [0], "gender": [0]}, -10.0), # 2 more identical examples make_example_proto({"age": [1], "gender": [1]}, 14.0), make_example_proto({"age": [1], "gender": [1]}, 14.0), ] example_weights = [1.0, 1.0, 1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=16, symmetric_l1_regularization=0, loss_type="squared_loss") lr = SdcaModel(examples, variables, options) tf.initialize_all_variables().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() # Predictions should be 1/5 of label due to minimizing regularized loss: # (label - 2 * weight)^2 + L2 * 16 * weight^2 optimal1 = -10.0 / 5.0 optimal2 = 14.0 / 5.0 self.assertAllClose([optimal1, optimal1, optimal2, optimal2], predictions.eval(), rtol=0.01)
def train(checkpoint_dir, image_list, batch_size, normalize): with tf.Graph().as_default(): global_step = tf.Variable(0, name='global_step', trainable=False) original_images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 144, 160, 3)) gray_images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 144, 160, 3)) # image summary for tensorboard tf.image_summary('original_images', original_images_placeholder, max_images=100) tf.image_summary('gray_images', gray_images_placeholder, max_images=100) logits = architecture.inference(gray_images_placeholder, "train") loss = architecture.loss(original_images_placeholder, logits) tf.scalar_summary('loss', loss) train_op = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss, global_step=global_step) # summary for tensorboard graph summary_op = tf.merge_all_summaries() variables = tf.all_variables() init = tf.initialize_all_variables() sess = tf.Session() try: os.mkdir(checkpoint_dir) except: pass sess.run(init) print "\nRunning session\n" # saver for the model saver = tf.train.Saver(tf.all_variables()) tf.train.start_queue_runners(sess=sess) # restore previous model if one ckpt = tf.train.get_checkpoint_state(checkpoint_dir+"training") if ckpt and ckpt.model_checkpoint_path: print "Restoring previous model..." try: saver.restore(sess, ckpt.model_checkpoint_path) print "Model restored" except: print "Could not restore model" pass # Summary op graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(checkpoint_dir+"training", graph_def=graph_def) # Constants step = int(sess.run(global_step)) #epoch_num = step/(train_size/batch_size) while True: step += 1 feed_dict = get_feed_dict(batch_size, original_images_placeholder, gray_images_placeholder, image_list, normalize) _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step % 1 == 0: print " Step: " + str(sess.run(global_step)) + " Loss: " + str(loss_value) # save tensorboard stuff #if step%200 == 0: # summary_str = sess.run(summary_op) # summary_writer.add_summary(summary_str, step) if step%100 == 0: print "Saving model" print saver.save(sess, checkpoint_dir+"training/checkpoint", global_step=global_step) print
data = preprocess(data) train_batch_data, train_batch_label = gen_batch(data, train_label) train_batch_data1, train_batch_label = gen_batch1(data, train_label) val_batch_data, val_batch_label = gen_batch(data, test_label) val_batch_data1, val_batch_label = gen_batch1(data, test_label) rand_ix = np.random.permutation(len(train_batch_data)) train_batch_data, train_batch_data1, train_batch_label = train_batch_data[ rand_ix], train_batch_data1[rand_ix], train_batch_label[rand_ix] rand_ix1 = np.random.permutation(len(val_batch_data)) val_batch_data, val_batch_data1, val_batch_label = val_batch_data[ rand_ix1], val_batch_data1[rand_ix1], val_batch_label[rand_ix1] model = Model(learning_rate) init = tf.initialize_all_variables() saver = tf.train.Saver() rcnn3d7 = [] with tf.Session() as sess: sess.run(init) for i_epoch in range(num_epoch): # training step total_train_loss = 0. total_train_acc = 0. for i in range(0, len(train_batch_data), batch_size): if i + batch_size >= len(train_batch_data): break batch_data = train_batch_data[i:i + batch_size] batch_data1 = train_batch_data1[i:i + batch_size]
flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string('data_dir', '/tmp/data/', 'Directory for storing data') mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) sess = tf.InteractiveSession() # Create the model x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x, W) + b) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # Train tf.initialize_all_variables().run() for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) train_step.run({x: batch_xs, y_: batch_ys}) # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}))
def test(self): """Test DnCNN""" f = file( '/home/lyj/tensorflow_code/DnCNN-tensorflow-master/test_per_8layer_4_sigma_25_multi_ensemable.txt', 'w+') #merged = tf.summary.merge_all() # init variables tf.initialize_all_variables().run() test_files = glob('./test/*.png'.format(self.testset)) # test_files = glob(os.path.join(self.test_save_dir, '{}/*.png'.format(self.testset))) print(test_files) counter = 0 # load testing input X_test = tf.placeholder(tf.float32, \ [1,None,None,1], name='noisy_image_test') predicted_noise, layer_1_output = self.sampler(X_test) print("[*] Loading test images ...") test_data = load_images(test_files) # list of array of different size for order in range(160, 192): model_path = './checkpoint/per_8layer_end_3layer_25_half_ensemable_BSD400_128_40/DnCNN.model-%d' % ( order * 500) saver = tf.train.Saver() saver.restore(self.sess, model_path) psnr_sum = 0 psnr_init_sum = 0 for idx in xrange(len(test_files)): noisy_image = add_noise(1 / 255.0 * test_data[idx], self.sigma, self.sess) # ndarray # pdb.set_trace() noise_ = (noisy_image - 1 / 255.0 * test_data[idx]) * 255.0 # ndarray # noise_var = np.var(noise_[:,:,:,0]) # noise_mean = np.mean(noise_[:,:,:,0]) # print("noise_var: %4f" %(noise_var)) # print("noise_mean: %4f" %(noise_mean)) # plt.figure() # image = noisy_image[0,:,:,0] # plt.imshow(image, cmap ='gray') # pdb.set_trace() predicted_noise_, layer_1_output_ = self.sess.run( [predicted_noise, layer_1_output], feed_dict={X_test: noisy_image}) # bn_mean,bn_var,bn_mean_new,bn_var_new = self.sess.run([self.bn_mean,self.bn_var,self.bn_mean_new,self.bn_var_new],feed_dict={X_test : noisy_image}) # bn_mean,bn_var = self.sess.run([self.bn_mean,self.bn_var],feed_dict={X_test : noisy_image}) # show_ = self.show(layer_1_output_,order) counter = counter + 1 # predicted_noise_=predicted_noise_*255 # predicted_noise_var = np.var(predicted_noise_) # predicted_noise_mean = np.mean(predicted_noise_) # print("predicted_noise_var: %4f" %(predicted_noise_var)) # print("predicted_noise_mean: %4f" %(predicted_noise_mean)) noisy_image = noisy_image[:, :, :, 0] # pdb.set_trace() output_clean_image = (noisy_image - predicted_noise_) * 255 # output_clean_image = predicted_noise_ * 255 # calculate PSNR groundtruth = np.clip(test_data[idx], 0, 255).astype('uint8') groundtruth = groundtruth[:, :, :, 0] noisyimage = np.clip(noisy_image * 255, 0, 255).astype('uint8') outputimage = np.clip(output_clean_image, 0, 255).astype('uint8') psnr = cal_psnr(groundtruth, outputimage) print(psnr) psnr_init_ = cal_psnr(groundtruth, noisyimage) print(psnr_init_) psnr_init_sum += psnr_init_ psnr_sum += psnr save_images( groundtruth, noisyimage, outputimage, os.path.join(self.sample_dir, 'test_mean_%d_%d.png' % (idx, counter))) avg_psnr = psnr_sum / len(test_files) avg_psnr_init = psnr_init_sum / len(test_files) #tf.summary.scalar('Average PSNR', avg_psnr) print("--- Test ---- Average PSNR %.4f ---" % avg_psnr) print("--- Test ---- Average init PSNR %.4f ---" % avg_psnr_init) f.write("--- Test ---- Average PSNR %.4f ---" % avg_psnr) f.write("\n") f.close()
def ddsigmoid(z): return expit(z) * (1.0 - expit(z)) print ddsigmoid(0.387) sess = tf.InteractiveSession() tf.reset_default_graph() one = tf.constant(1.0) X = tf.placeholder("float") # create symbolic variable Y = tf.placeholder("float") # create symbolic variable x_77 = tf.constant(0.387) # derivative of sigmoid= sigmoid(y) * (1.0 - sigmoid(y)) sigmoid = (tf.div(one, (one + tf.exp(-X)))) dsigmoid = tf.multiply(Y, tf.subtract(one, Y)) init = tf.initialize_all_variables( ) # you need to initialize variables (in this case just variable W) sess = tf.Session() print sess.run(init) print sess.run(sigmoid, {X: x_77.eval(session=sess)}) print sess.run(sigmoid, {X: 0.387}) print sess.run(dsigmoid, {Y: sigmoid.eval({X: 0.387}, session=sess)})
#RMSPropOptimizer (初めて耳にしました)学習率の調整を自動化したアルゴリズム #https://qiita.com/TomokIshii/items/f355d8e87d23ee8e0c7a train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #予測の評価 #tf.argmax(y,1)は各インプットに対して最も確からしいラベルを返し、tf.argmax(y_,1)は正解のラベルを返します。 #そしてtf.equalで私たちの予測が当たっていたかを判定することができます。 #https://qiita.com/qooa/items/3719fec3cfe764674fb9 correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) #型の変換する #tf.cast( 変換したいもの , 変換後の型 ) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #実行 sess.run(tf.initialize_all_variables()) create_images("before") #ミニバッチ学習 #バッチ学習 学習データxがN個あるときに、N個のデータを全て用いて、それぞれのデータでの損失lの平均を計算し、それをデータ全体の損失Lとする学習 #確率的勾配法 N個のデータx1,x2,...,xNからランダムに1つxiを選び出し、そのデータ1つに対する損失lをそのままLとする学習 #ミニバッチ学習 全体を考慮したバッチ学習と、確率的勾配法の間を取ったのがミニバッチ学習であり、このとき学習データxがN個あるときに、ランダムなn(≤N)個のデータを使いLを求める学習 # 分類クラス数が多いほど、ミニバッチサイズを小さくすることが有効かも。一番主流。 #https://www.hellocybernetics.tech/entry/2017/07/08/152859 num_epoch = 1001 show = 100 num_data = train_images.shape[0] batch_size = 16 Loss = [] Accuracy = [] for i in range(1, num_epoch):
def main(): # Specify training parameters result_2b = './results_2b/' # directory where the results from the training are saved result_2b_test = './results_2b_test/' #result_2b_validate = './results_2b_validate/' max_step = 5500 # the maximum iterations. After max_step iterations, the training will stop no matter what start_time = time.time() # start timing # FILL IN THE CODE BELOW TO BUILD YOUR NETWORK # placeholders for input data and input labeles x = tf.placeholder(tf.float32, [None, 784], name='x') y_ = tf.placeholder(tf.float32, [None, 10], name='y_') # reshape the input image x_image = tf.reshape(x, [-1, 28, 28, 1]) # first convolutional layer with tf.name_scope('firstLayer'): with tf.name_scope('weights'): W_conv1 = weight_variable([5, 5, 1, 32]) variable_summaries(W_conv1) with tf.name_scope('bias'): b_conv1 = bias_variable([32]) variable_summaries(b_conv1) with tf.name_scope('Relu'): h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) variable_summaries(h_conv1) with tf.name_scope('max_pool_2x2'): h_pool1 = max_pool_2x2(h_conv1) variable_summaries(h_pool1) with tf.name_scope('NetInput'): netinput1 = conv2d(x_image, W_conv1) + b_conv1 variable_summaries(netinput1) # second convolutional layer with tf.name_scope('secondLayer'): with tf.name_scope('weights'): W_conv2 = weight_variable([5, 5, 32, 64]) variable_summaries(W_conv2) with tf.name_scope('bias'): b_conv2 = bias_variable([64]) variable_summaries(b_conv2) with tf.name_scope('Relu'): h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) variable_summaries(h_conv2) with tf.name_scope('max_pool_2x2'): h_pool2 = max_pool_2x2(h_conv2) variable_summaries(h_pool2) with tf.name_scope('NetInput'): netinput2 = conv2d(h_pool1, W_conv2) + b_conv2 variable_summaries(netinput2) # densely connected layer with tf.name_scope('denselyLayer'): with tf.name_scope('weights'): W_fc1 = weight_variable([7 * 7 * 64, 1024]) variable_summaries(W_fc1) with tf.name_scope('bias'): b_fc1 = bias_variable([1024]) variable_summaries(b_fc1) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) with tf.name_scope('NetInput'): netinput3 = tf.matmul(h_pool2_flat, W_fc1) + b_fc1 variable_summaries(netinput3) with tf.name_scope('Relu'): h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) variable_summaries(h_fc1) with tf.name_scope('MaxPool'): variable_summaries(h_pool2_flat) # dropout keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # softmax W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 # FILL IN THE FOLLOWING CODE TO SET UP THE TRAINING # setup training y = tf.nn.softmax(y_conv, name='y') cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') # Add a scalar summary for the snapshot loss. tf.summary.scalar(cross_entropy.op.name, cross_entropy) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(result_2b, sess.graph) test = tf.summary.FileWriter(result_2b_test, sess.graph) #validate_error = tf.summary.FileWriter(result_2b_validate, sess.graph) # Run the Op to initialize the variables. sess.run(init) # run the training for i in range(max_step): batch = mnist.train.next_batch( 50 ) # make the data batch, which is used in the training iteration. # the batch size is 50 if i % 100 == 0: # output the training accuracy every 100 iterations train_accuracy = accuracy.eval(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0 }) print("step %d, training accuracy %g" % (i, train_accuracy)) # Update the events file which is used to monitor the training (in this case, # only the training loss is monitored) summary_str = sess.run(summary_op, feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) summary_writer.add_summary(summary_str, i) summary_writer.flush() # save the checkpoints every 1100 iterations if i % 1100 == 0 or i == max_step: checkpoint_file = os.path.join(result_2b, 'checkpoint') saver.save(sess, checkpoint_file, global_step=i) test_summary = sess.run(summary_op, feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 0.5 }) test.add_summary(test_summary, i) test.flush() train_step.run(feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) # run one train_step # print test error print("test accuracy %g" % accuracy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0 })) stop_time = time.time() print('The training takes %f second to finish' % (stop_time - start_time))
def train(self, config): if config.is_train: input_setup(self.sess, config) else: nx, ny = input_setup(self.sess, config) if config.is_train: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "train.h5") else: data_dir = os.path.join('./{}'.format(config.checkpoint_dir), "test.h5") train_data, train_label = read_data(data_dir) # Stochastic gradient descent with the standard backpropagation self.train_op = tf.train.AdamOptimizer().minimize(self.loss) tf.initialize_all_variables().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if config.is_train: print("Training...") epoch_loss = 0 average_loss = 0 average_ssim = 0 for ep in xrange(config.epoch):#for each epoch # Run by batch images batch_idxs = len(train_data) // config.batch_size#TODO: check data loader of tensorflow and shuffle training data in each epoch for idx in xrange(0, batch_idxs): batch_images = train_data[idx*config.batch_size : (idx+1)*config.batch_size] batch_labels = train_label[idx*config.batch_size : (idx+1)*config.batch_size] counter += 1 _, err = self.sess.run([self.train_op, self.loss], feed_dict={self.images: batch_images, self.labels: batch_labels})#update weights and biases average_ssim += ssim(self.pred.eval(feed_dict={self.images: batch_images, self.labels: batch_labels})[:, 33:66, 33:66], self.labels.eval(feed_dict={self.images: batch_images, self.labels: batch_labels}), multichannel=True)/batch_idxs epoch_loss += err average_loss = epoch_loss / float(batch_idxs) PSNR=10*math.log10(1/average_loss) if counter % 10 == 0:#display training loss for every 10 batches print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]" % ((ep+1), counter, time.time()-start_time, err)) if counter % (batch_idxs*10) == 0:#save model for every 500 batches. Note: final model may not be saved!!! self.save(config.checkpoint_dir, counter) if counter % batch_idxs == 0: with open('data.txt', 'a') as file: file.write(str(average_loss) + " , " + str(PSNR)+ " , " + str(average_ssim)+"\n") epoch_loss = 0 average_loss = 0 average_ssim = 0 else: print("Testing...") result = self.pred.eval({self.images: train_data, self.labels: train_label}) print(nx,ny) result = merge(result, [nx, ny]) result = result.squeeze() image_path = os.path.join(os.getcwd(), config.sample_dir) image_path = os.path.join(image_path, "test_image.png") imsave(result, image_path)
def train(): """ It performs the training of the model and evaluates validation accuracy at specified intervals. It also adds a summary of some parameters for visualization in tensorboard. """ # define placeholder for inputs to network xs = tf.placeholder(tf.float32, [None, 784]) # 28x28 ys = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) x_image = tf.reshape(xs, [-1, 28, 28, 1]) prediction = model(x_image=x_image, keep_prob=keep_prob) # Cross entropy function for minimization. cross_entropy = tf.reduce_mean( -tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # loss tf.summary.scalar('cross entropy', cross_entropy) # Accuracy Scalar of the model. with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(ys, 1), tf.argmax(prediction, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # Summarization for tensorboard. merged = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') sess.run(tf.initialize_all_variables()) n_steps = (mnist.train.num_examples * FLAGS.n_epochs) / (FLAGS.BATCH_SIZE) + 1 # Actual training for i in range(n_steps): start_time = time.time() batch_xs, batch_ys = mnist.train.next_batch(FLAGS.BATCH_SIZE) summary, _ = sess.run([merged, train_step], feed_dict={ xs: batch_xs, ys: batch_ys, keep_prob: FLAGS.dropout }) train_writer.add_summary(summary, i) if i % FLAGS.eval_num_steps == 0: summary, acc = sess.run( [merged, accuracy], feed_dict={ xs: mnist.validation.images, ys: mnist.validation.labels, keep_prob: 1 }) validation_writer.add_summary(summary, i) logger.info('Step: {0} Accuracy: {1} Time taken: {2}'.format( i, acc, time.time() - start_time)) test_acc = accuracy.eval(session=sess, feed_dict={ xs: mnist.test.images, ys: mnist.test.labels, keep_prob: 1 }) logger.info('Test Accuracy = {0}'.format(test_acc)) train_writer.close() validation_writer.close()
def fit(self): # reader = vid_reader.Video_Reader2(self.path_to_train_dir) with self.graph.as_default(): saver = tf.train.Saver() best_error = -1 with tf.Session() as sess: if self.path_to_load_variables == '': sess.run(tf.initialize_all_variables()) else: saver.restore(sess, self.path_to_load_variables) print 'loaded variables ' + self.path_to_load_variables ces = [] last_ce_mean = .9 for step in range(99999): batch = [] labels = [] # for i in range(self.batch_size): while len(batch) != self.batch_size: # samp, label = reader.get_rand_vid_and_label() # if len(samp) != 8: # print 'WHAT!' # continue # distorted_samp = [] # for j in range(len(samp)): # distorted_image = tf.image.random_contrast(tf.image.random_brightness(samp[j],max_delta=63),lower=0.2, upper=1.8) # distorted_samp.append(distorted_image) seq = make_ball_gif() for i in range(len(seq)): seq[i] = seq[i] / np.max(seq[i]) seq1 = list(seq) seq1.pop(-1) batch.append(seq1) seq2 = list(seq) seq2.pop(0) # seq2.append(np.zeros((self.image_height,self.image_width,1))) labels.append(seq2) # batch = np.array(batch) # print batch.shape feed_dict = {self.input: batch, self.target: labels} # _ = sess.run(self.train_opt, feed_dict=feed_dict) # ce, ff = sess.run([self.cross_entropy, self.logits], feed_dict=feed_dict) # _ = sess.run([self.train_opt], feed_dict=feed_dict) # ce2, ff = sess.run([self.cross_entropy, self.logits], feed_dict=feed_dict) # print step, ce2, ce-ce2, best_error # print len(batch) # print batch[0].shape # fsdaas # _, ce = sess.run([self.train_opt, self.cross_entropy], feed_dict=feed_dict) ce = sess.run(self.cross_entropy, feed_dict=feed_dict) print ce _ = sess.run(self.train_opt, feed_dict=feed_dict) ce = sess.run(self.cross_entropy, feed_dict=feed_dict) print ce print # if ce < .25: # break if step % 400 == 0: act_out = sess.run(self.actual_output, feed_dict=feed_dict) print act_out saver.save(sess, self.path_to_save_variables) print 'Saved variables to ' + self.path_to_save_variables
# Fit a straight line, of the form y=m*x+b import tensorflow as tf xs = [ 0.00, 1.00, 2.00, 3.00, 4.00, 5.00, 6.00, 7.00] # Features ys = [-0.82, -0.94, -0.12, 0.26, 0.39, 0.64, 1.02, 1.00] # Labels m_initial = -0.5 # Initial guesses b_initial = 1.0 m = tf.Variable(m_initial) # Parameters b = tf.Variable(b_initial) ys_model = m*xs+b # Tensorflow knows this is a vector operation error = tf.reduce_sum((ys-ys_model)**2) # Sum up every item in the vector operation = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(error) # Does one step with tf.Session() as session: session.run(tf.initialize_all_variables()) # Initialize session for iteration in range(10000): session.run(operation) print('Slope:', m.eval(), 'Intercept:', b.eval())
def train(data_dirs, batch_size=32, num_classes=1, augment_data=True, checkpoint_dir='checkpoints', restore_checkpoint=True, checkpoint_file=None, restore_step=None, save_checkpoint_step=1000, save_summary_step=100, log_step=10, dropout=0.8, max_steps=100000, num_examples_per_epoch=1000, log_device_placement=False, cameras=None, min_angle=None, max_angle=None): with tf.Graph().as_default(): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if restore_checkpoint and ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint if not restore_step: restore_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) print('Checkpoint step:', restore_step) global_step = tf.Variable(restore_step, trainable=False) else: global_step = tf.Variable(0, trainable=False) restore_step = 0 print('No checkpoint file found') # Get images and labels. images, labels = nvidia_input.inputs( batch_size=batch_size, data_dirs=data_dirs, shuffle=True, num_classes=num_classes, augment_data=augment_data, num_examples_per_epoch=num_examples_per_epoch, cameras=cameras, min_angle=min_angle, max_angle=max_angle, raw_labels=False) # Build a Graph that computes the logits predictions from the # inference model. output = nvidia_model.inference(images, dropout, num_classes=num_classes) # Calculate loss. _loss = nvidia_model.loss(output, labels, num_classes=num_classes) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = nvidia_model.train(_loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=log_device_placement)) sess.run(init) if restore_checkpoint and ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore( sess, os.path.join(checkpoint_dir, 'model.ckpt-{}'.format(restore_step))) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(checkpoint_dir, sess.graph) for step in range(restore_step, max_steps): start_time = time.time() _, loss_value = sess.run([train_op, _loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % log_step == 0: num_examples_per_step = batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.6f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step > 0 and step % save_summary_step == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step > 0 and (step % save_checkpoint_step == 0 or (step + 1) == max_steps): checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(mnist): x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input') #生成隐藏层参数 weights1 = tf.Variable( #正态分布,标准差为0.1 tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE])) #生成输出层参数 weights2 = tf.Variable( tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1)) biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE])) y = inference(x, None, weights1, biases1, weights2, biases2) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2) #计算交叉熵作为刻画预测值和真实值之间差距的损失函数 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) #计算在当前batch中所有样例的交叉熵平均值 cross_entropy_mean = tf.reduce_mean(cross_entropy) #计算L2正则化损失函数 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) #计算模型的正则化损失。一般只计算NN边上权重的正则化损失,而不使用偏置项 regularization = regularizer(weights1) + regularizer(weights2) #总损失等于交叉熵损失和正则化损失的和 loss = cross_entropy_mean + regularization learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) #使用tf.train.GradientDescentOptimizer优化算法来优化损失函数 #此处损失函数包含了交叉熵损失和L2正则化损失 train_step = tf.train.GradientDescentOptimizer(learning_rate)\ .minimize(loss, global_step=global_step) #为了一次完成多个操作,TensorFlow提供了tf.control_dependencies和tf.group两种机制 #下面两行程序和train_op = tf.group(train_step,variables_averages_op)是等价的 with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name='train') correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1)) #此运算首先将一个布尔型的数值转换为实数型,然后计算平均值 #此平均值即为模型在这一组数据上的正确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #初始化会话并开始训练 with tf.Session() as sess: tf.initialize_all_variables().run() #准备验证数据 #在NN的训练过程中通过验证数据来大致判断停止的条件和评判训练的效果 validate_feed = { x: mnist.validation.images, y_: mnist.validation.labels } #准备测试数据 #在实际应用中,此部分数据训练时不可见,只是作为模型优劣的最后评价标准 test_feed = {x: mnist.test.images, y_: mnist.test.labels} #迭代训练神经网络 for i in range(TRAINING_STEPS): #每1000轮输出一次在验证数据集上的测试结果 if i % 1000 == 0: #计算滑动平均模型在验证数据上的结果 #因为MNIST数据集比较小,故一次可以处理所有的验证数据 #当NN模型比较复杂或者验证数据比较大时,太大的batch会导致计算时间过长甚至发生内存溢出的错误 validate_acc = sess.run(accuracy, feed_dict=validate_feed) print("After %d training steps, validation accuracy " "using average model is %g" % (i, validate_acc)) #产生此轮使用的一个batch训练数据,并运行训练过程 xs, ys = mnist.train.next_batch(BATCH_SIZE) sess.run(train_op, feed_dict={x: xs, y_: ys}) #训练结束后,在测试数据上检测神经网络模型的最终正确率 test_acc = sess.run(accuracy, feed_dict=test_feed) print("After %d training steps, test accuracy " "using average model is %g" % (TRAINING_STEPS, test_acc))
def train(self, config): """Train DCGAN""" if config.dataset == 'mnist': data_X, data_y = self.load_mnist() else: data = glob( os.path.join("./data", config.dataset, self.input_fname_pattern)) data2 = glob( os.path.join("./data", config.dataset2, self.input_fname_pattern)) # np.random.shuffle(data) d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1).minimize( self.d_loss, var_list=self.d_vars) g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1).minimize( self.g_loss, var_list=self.g_vars) # Wasserstein-GAN d_optim = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( -self.d_loss, var_list=self.d_vars) g_optim = tf.train.RMSPropOptimizer(learning_rate=5e-5).minimize( -self.g_loss, var_list=self.g_vars) clip_d = [ tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in self.d_vars ] try: tf.global_variables_initializer().run() except: # Fit for different APIs of Tensorflow tf.initialize_all_variables().run() self.g_sum = merge_summary([ self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum, self.g_loss_sum ]) self.d_sum = merge_summary( [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum]) self.writer = SummaryWriter("./logs", self.sess.graph) ''' To Do: Option 1: add a function that maps images to [?, self.z_dim] vector (Done, not good) Option 2: modify z and related parameters Option 3: add a conv layer that maps images to z Option 4: add a CS layer that maps images to ? ''' #sample_z = np.random.uniform(-1, 1, size=(self.sample_num, self.z_dim)) '''need modify''' sample_z = np.random.uniform(-1, 1, size=(self.sample_num, self.z_dim)) if config.dataset == 'mnist': sample_inputs = data_X[0:self.sample_num] sample_labels = data_y[0:self.sample_num] else: '''Need to prepare additional sample inputs for outputs''' sample_files = data[0:self.sample_num] sample = [ get_image(sample_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, is_crop=self.is_crop, is_grayscale=self.is_grayscale) for sample_file in sample_files ] sample_zs = data2[0:self.sample_num] sample_2 = [ get_image(sample_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, is_crop=self.is_crop, is_grayscale=self.is_grayscale) for sample_file in sample_zs ] sample_blurred = copy.deepcopy(sample_2) sample_2 = [s.reshape([self.z_dim]) for s in sample_2] if (self.is_grayscale): sample_inputs = np.array(sample).astype(np.float32)[:, :, :, None] else: sample_inputs = np.array(sample).astype(np.float32) sample_z = np.array(sample_2).astype(np.float32) counter = 1 start_time = time.time() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: counter = checkpoint_counter print(" [***] Load SUCCESS") else: print(" [!!!] Load failed...") for epoch in xrange(config.epoch): if config.dataset == 'mnist': batch_idxs = min(len(data_X), config.train_size) // config.batch_size else: data = glob( os.path.join("./data", config.dataset, self.input_fname_pattern)) data2 = glob( os.path.join("./data", config.dataset2, self.input_fname_pattern)) batch_idxs = min(len(data), config.train_size) // config.batch_size for idx in xrange(0, batch_idxs): '''need modify''' batch_z = np.random.uniform( -1, 1, [config.batch_size, self.z_dim]).astype(np.float32) if config.dataset == 'mnist': batch_images = data_X[idx * config.batch_size:(idx + 1) * config.batch_size] batch_labels = data_y[idx * config.batch_size:(idx + 1) * config.batch_size] else: '''Need to prepare additional batch inputs for training, namely, z''' batch_files = data[idx * config.batch_size:(idx + 1) * config.batch_size] batch = [ get_image(batch_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, is_crop=self.is_crop, is_grayscale=self.is_grayscale) for batch_file in batch_files ] batch_zs = data2[0:self.sample_num] batch_2 = [ get_image(batch_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, is_crop=self.is_crop, is_grayscale=self.is_grayscale).reshape( [self.z_dim]) for batch_file in batch_zs ] if (self.is_grayscale): batch_images = np.array(batch).astype( np.float32)[:, :, :, None] else: batch_images = np.array(batch).astype(np.float32) batch_z = np.array(batch_2).astype(np.float32) if config.dataset == 'mnist': # Update D network _, summary_str = self.sess.run( [d_optim, self.d_sum], feed_dict={ self.inputs: batch_images, self.z: batch_z, self.y: batch_labels, }) self.writer.add_summary(summary_str, counter) # Update G network _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z, self.y: batch_labels, }) self.writer.add_summary(summary_str, counter) # Run g_optim twice to make sure that d_loss does not go to # zero (different from paper) _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z, self.y: batch_labels }) self.writer.add_summary(summary_str, counter) errD_fake = self.d_loss_fake.eval({ self.z: batch_z, self.y: batch_labels }) errD_real = self.d_loss_real.eval({ self.inputs: batch_images, self.y: batch_labels }) errG = self.g_loss.eval({ self.z: batch_z, self.y: batch_labels }) else: ################################################################################### # Update D network _, summary_str = self.sess.run([d_optim, self.d_sum], feed_dict={ self.inputs: batch_images, self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Update G network _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={self.z: batch_z}) self.writer.add_summary(summary_str, counter) # Run g_optim twice to make sure that d_loss does not go to # zero (different from paper) _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={self.z: batch_z}) self.writer.add_summary(summary_str, counter) ################################################################################### # Wasserstein-GAN _, summary_str, _ = self.sess.run( [d_optim, self.d_sum, clip_d], feed_dict={ self.inputs: batch_images, self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Update G network if idx % 5 == 0: _, summary_str = self.sess.run( [g_optim, self.g_sum], feed_dict={self.z: batch_z}) self.writer.add_summary(summary_str, counter) ################################################################################### errD_fake = self.d_loss_fake.eval({self.z: batch_z}) errD_real = self.d_loss_real.eval( {self.inputs: batch_images}) errG = self.g_loss.eval({self.z: batch_z}) counter += 1 print( "Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" % (epoch, idx, batch_idxs, time.time() - start_time, errD_fake + errD_real, errG)) # outputs if np.mod(counter, 100) == 1: if config.dataset == 'mnist': samples, d_loss, g_loss = self.sess.run( [self.sampler, self.d_loss, self.g_loss], feed_dict={ self.z: sample_z, self.inputs: sample_inputs, self.y: sample_labels, }) manifold_h = int(np.ceil(np.sqrt(samples.shape[0]))) manifold_w = int(np.floor(np.sqrt(samples.shape[0]))) save_images( samples, [manifold_h, manifold_w], './{}/train_{:02d}_{:04d}.png'.format( config.sample_dir, epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) else: try: samples, d_loss, g_loss = self.sess.run( [self.sampler, self.d_loss, self.g_loss], feed_dict={ self.z: sample_z, self.inputs: sample_inputs, }, ) manifold_h = int(np.ceil(np.sqrt( samples.shape[0]))) manifold_w = int( np.floor(np.sqrt(samples.shape[0]))) save_images( samples, [manifold_h, manifold_w], './{}/train_{:02d}_{:04d}.png'.format( config.sample_dir, epoch, idx)) sample_blurred = np.asarray(sample_blurred) save_images( sample_blurred, [manifold_h, manifold_w], './{}/blurred_{:02d}_{:04d}.png'.format( config.sample_dir, epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) except: print("one pic error!...") if np.mod(counter, 500) == 2: self.save(config.checkpoint_dir, counter)
def train(outdir): HAVE_TRUTH = False ''' Set up paths and start log ''' npzfile = outdir + 'result' repfile = outdir + 'reps' outform = outdir + 'y_pred' lossform = outdir + 'loss' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() ''' Set random seeds ''' random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) ''' Save parameters ''' save_config(outdir + 'config.txt') log( logfile, 'Training with hyperparameters: alpha=%.2e, lambda=%.2e' % (FLAGS.p_alpha, FLAGS.p_lambda)) ''' Load data ''' log(logfile, 'Loading data for dimensions... ' + FLAGS.datapath) x_all, t_all, y_f_all, y_cf_all = load_data(FLAGS.datapath) if not y_cf_all is None: HAVE_TRUTH = True dim = x_all.shape[1] n = x_all.shape[0] log(logfile, 'Loaded data with shape [%d,%d]' % (n, dim)) ''' Start Session ''' log(logfile, 'Starting session...') sess = tf.Session() ''' Initialize input placeholders ''' x_ = tf.placeholder("float", shape=[None, dim], name='x_') # Features t_ = tf.placeholder("float", shape=[None, 1], name='t_') # Treatent y_ = tf.placeholder("float", shape=[None, 1], name='y_') # Outcome ''' Parameter placeholders ''' alpha_ = tf.placeholder("float", name='alpha_') lambda_ = tf.placeholder("float", name='lambda_') do_in = tf.placeholder("float", name='dropout_in') do_out = tf.placeholder("float", name='dropout_out') p = tf.placeholder("float", name='p_treated') ''' Define model graph ''' log(logfile, 'Defining graph...') dims = [dim, FLAGS.dim_in, FLAGS.dim_out] CFR = cfr.cfr_net(x_, t_, y_, p, FLAGS, alpha_, lambda_, do_in, do_out, dims) if FLAGS.varsel: w_proj = tf.placeholder("float", shape=[dim], name='w_proj') projection = CFR.weights_in[0].assign(w_proj) ''' Set up optimizer ''' log(logfile, 'Training...') global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(FLAGS.lrate, global_step, \ NUM_ITERATIONS_PER_DECAY, FLAGS.lrate_decay, staircase=True) train_step = tf.train.RMSPropOptimizer(lr, FLAGS.decay).minimize( CFR.tot_loss, global_step=global_step) ''' Compute treatment probability''' t_cf_all = 1 - t_all if FLAGS.use_p_correction: p_treated = np.mean(t_all) else: p_treated = 0.5 ''' Set up loss feed_dicts''' dict_factual = {x_: x_all, t_: t_all, y_: y_f_all, \ do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, \ lambda_:FLAGS.p_lambda, p:p_treated} if HAVE_TRUTH: dict_cfactual = {x_: x_all, t_: t_cf_all, y_: y_cf_all, \ do_in:1.0, do_out:1.0} ''' Initialize tensorflow variables ''' sess.run(tf.initialize_all_variables()) ''' Compute losses before training''' losses = [] obj_loss, f_error, imb_err = sess.run([CFR.tot_loss, CFR.pred_loss, \ CFR.imb_loss], feed_dict=dict_factual) cf_error = np.nan if HAVE_TRUTH: cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual) losses.append([obj_loss, f_error, cf_error, imb_err]) log(logfile, 'Objective Factual CFactual Imbalance') log(logfile, str(losses[0])) ''' Train for m iterations ''' for i in range(FLAGS.iterations): ''' Fetch sample ''' I = random.sample(range(0, n), FLAGS.batch_size) x_batch = x_all[I, :] t_batch = t_all[I] y_batch = y_f_all[I] ''' Do one step of gradient descent ''' sess.run(train_step, feed_dict={x_: x_batch, t_: t_batch, \ y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \ alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated}) ''' Project variable selection weights ''' if FLAGS.varsel: wip = cfr.simplex_project(sess.run(CFR.weights_in[0]), 1) sess.run(projection, feed_dict={w_proj: wip}) ''' Compute loss every N iterations ''' if i % FLAGS.output_delay == 0: obj_loss, f_error, imb_err = sess.run( [CFR.tot_loss, CFR.pred_loss, CFR.imb_loss], feed_dict=dict_factual) y_pred = sess.run(CFR.output, feed_dict={x_: x_batch, t_: t_batch, \ y_: y_batch, do_in:FLAGS.dropout_in, do_out:FLAGS.dropout_out, \ alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda, p:p_treated}) cf_error = np.nan if HAVE_TRUTH: cf_error = sess.run(CFR.pred_loss, feed_dict=dict_cfactual) losses.append([obj_loss, f_error, cf_error, imb_err]) loss_str = str( i) + '\tObj: %.4g,\tF: %.4g,\tCf: %.4g,\tImb: %.4g' % ( obj_loss, f_error, cf_error, imb_err) if FLAGS.loss == 'log': y_pred = 1.0 * (y_pred > 0.5) acc = 100 * (1 - np.mean(np.abs(y_batch - y_pred))) loss_str += ',\tAcc: %.2f%%' % acc log(logfile, loss_str) log(logfile, 'Ending learning rate: %.2g' % sess.run(lr)) ''' Predict response and store ''' ypred_f = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_all, \ do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda}) ypred_c = sess.run(CFR.output, feed_dict={x_: x_all, t_: t_cf_all, \ do_in:1.0, do_out:1.0, alpha_:FLAGS.p_alpha, lambda_:FLAGS.p_lambda}) ypred = np.concatenate((ypred_f, ypred_c), axis=1) log(logfile, 'Saving to %s...' % outform) if FLAGS.output_csv: np.savetxt('%s.csv' % (outform), ypred, delimiter=',') np.savetxt('%s.csv' % (lossform), losses, delimiter=',') ''' Compute weights''' if FLAGS.varsel: all_weights = np.dstack((all_weights, sess.run(CFR.weights_in[0]))) all_beta = np.dstack((all_beta, sess.run(CFR.weights_pred))) ''' Save results and predictions ''' if FLAGS.varsel: np.savez(npzfile, pred=ypred, loss=losses, w=all_weights, beta=all_beta) else: np.savez(npzfile, pred=ypred, loss=losses) ''' Save representations ''' if FLAGS.save_rep: reps = sess.run([CFR.h_rep], feed_dict={ x_: x_all, do_in: 1.0, do_out: 0.0 }) np.savez(repfile, rep=reps)
def train(self, config): d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.d_loss, var_list=self.d_vars) g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_loss, var_list=self.g_vars) try: tf.global_variables_initializer().run() except: tf.initialize_all_variables().run() self.g_sum = merge_summary([self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum, self.g_loss_sum]) self.d_sum = merge_summary( [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum]) self.writer = SummaryWriter("./logs", self.sess.graph) sample_z = np.random.uniform(-1, 1, size=(self.sample_num , self.z_dim)) sample_files = self.data[0:self.sample_num] sample = [ get_image(sample_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, crop=self.crop, grayscale=self.grayscale) for sample_file in sample_files] if (self.grayscale): sample_inputs = np.array(sample).astype(np.float32)[:, :, :, None] else: sample_inputs = np.array(sample).astype(np.float32) counter = 1 start_time = time.time() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: counter = checkpoint_counter print(" [*] Load SUCCESS") else: print(" [!] Load failed...") for epoch in xrange(config.epoch): self.data = glob(os.path.join( config.data_dir, config.dataset, self.input_fname_pattern)) batch_idxs = min(len(self.data), config.train_size) // config.batch_size for idx in xrange(0, batch_idxs): batch_files = self.data[idx*config.batch_size:(idx+1)*config.batch_size] batch = [ get_image(batch_file, input_height=self.input_height, input_width=self.input_width, resize_height=self.output_height, resize_width=self.output_width, crop=self.crop, grayscale=self.grayscale) for batch_file in batch_files] # Remove None values batch = [x for x in batch if x is not None] if self.grayscale: batch_images = np.array(batch).astype(np.float32)[:, :, :, None] else: batch_images = np.array(batch).astype(np.float32) batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \ .astype(np.float32) # Update D network _, summary_str = self.sess.run([d_optim, self.d_sum], feed_dict={ self.inputs: batch_images, self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Update G network _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Run g_optim twice to make sure that d_loss does not go to zero (different from paper) _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z }) self.writer.add_summary(summary_str, counter) errD_fake = self.d_loss_fake.eval({ self.z: batch_z }) errD_real = self.d_loss_real.eval({ self.inputs: batch_images }) errG = self.g_loss.eval({self.z: batch_z}) counter += 1 print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ % (epoch, config.epoch, idx, batch_idxs, time.time() - start_time, errD_fake+errD_real, errG)) if np.mod(counter, 100) == 1: try: samples, d_loss, g_loss = self.sess.run( [self.sampler, self.d_loss, self.g_loss], feed_dict={ self.z: sample_z, self.inputs: sample_inputs, }, ) save_images(samples, image_manifold_size(samples.shape[0]), './{}/train_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) except: print("one pic error!...") if np.mod(counter, 500) == 2: self.save(config.checkpoint_dir, counter)
def train(self, lr = 1e-3, epochs=100): optimizer = Adam(lr=lr, decay=lr/10) self.model.compile(loss="sparse_categorical_crossentropy", optimizer= optimizer, metrics = ['accuracy']) keras.backend.get_session().run(tf.initialize_all_variables()) history = self.model.fit(self.X_train, self.y_train_vect, batch_size= 50, nb_epoch=100, verbose=1, validation_data=(self.X_val, self.y_val_vect))
def main(argv=None): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.self_test: print('Running self-test.') train_data, train_labels = fake_data(256) validation_data, validation_labels = fake_data(EVAL_BATCH_SIZE) test_data, test_labels = fake_data(EVAL_BATCH_SIZE) num_epochs = 1 else: train_data_filename = maybe_download('train-images-idx3-ubyte.gz') train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') train_data = extract_data(train_data_filename, 60000) train_labels = extract_labels(train_labels_filename, 60000) test_data = extract_data(test_data_filename, 10000) test_labels = extract_labels(test_labels_filename, 10000) validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE] train_data = train_data[VALIDATION_SIZE:, ...] train_labels = train_labels[VALIDATION_SIZE:] num_epochs = NUM_EPOCHS if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): train_size = train_labels.shape[0] train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) train_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE, )) eval_data = tf.placeholder(tf.float32, shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) conv1_weights = tf.Variable( tf.truncated_normal([5, 5, NUM_CHANNELS, 32], stddev=0.1, seed=SEED)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED)) conv2_biases = tf.Variable(tf.constant(0.1, shape=[64])) fc1_weights = tf.Variable( tf.truncated_normal( [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512], stddev=0.1, seed=SEED)) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED)) fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS])) def model(data, train=False): conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases)) pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') pool_shape = pool.get_shape().as_list() reshape = tf.reshape(pool, [ pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3] ]) hidden = tf.nn.relu( tf.matmul(reshape, fc1_weights) + fc1_biases) if train: hidden = tf.nn.dropout(hidden, 0.5, seed=SEED) return tf.matmul(hidden, fc2_weights) + fc2_biases logits = model(train_data_node, True) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits, train_labels_node)) regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) loss += 5e-4 * regularizers batch = tf.Variable(0) learning_rate = tf.train.exponential_decay(0.01, batch * BATCH_SIZE, train_size, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer( learning_rate, 0.9).minimize(loss, global_step=batch) train_prediction = tf.nn.softmax(logits) eval_prediction = tf.nn.softmax(model(eval_data)) def eval_in_batches(data, sess): size = data.shape[0] if size < EVAL_BATCH_SIZE: raise ValueError( "batch size for evals larger than dataset: %d" % size) predictions = numpy.ndarray(shape=(size, NUM_LABELS), dtype=numpy.float32) for begin in xrange(0, size, EVAL_BATCH_SIZE): end = begin + EVAL_BATCH_SIZE if end <= size: predictions[begin:end, :] = sess.run( eval_prediction, feed_dict={eval_data: data[begin:end, ...]}) else: batch_predictions = sess.run( eval_prediction, feed_dict={ eval_data: data[-EVAL_BATCH_SIZE:, ...] }) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() print('Initialized!') sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), init_op=init_op, summary_op=summary_op, global_step=batch) start_time = time.time() with sv.prepare_or_wait_for_session(server.target, config=None) as sess: for step in xrange(int(num_epochs * train_size) // BATCH_SIZE): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_data = train_data[offset:(offset + BATCH_SIZE), ...] batch_labels = train_labels[offset:(offset + BATCH_SIZE)] feed_dict = { train_data_node: batch_data, train_labels_node: batch_labels } _, l, lr, predictions = sess.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) print('Validation error: %.1f%%' % error_rate(eval_in_batches(validation_data, sess), validation_labels)) sys.stdout.flush() test_error = error_rate(eval_in_batches(test_data, sess), test_labels) print('Test error: %.1f%%' % test_error) if FLAGS.self_test: print('test_error', test_error) assert test_error == 0.0, 'expected 0.0 test_error, got %.2f' % ( test_error, ) sv.stop()
def __init__(self, phase, visualize, output_dir, batch_size, initial_learning_rate, steps_per_checkpoint, model_dir, target_embedding_size, attn_num_hidden, attn_num_layers, clip_gradients, max_gradient_norm, session, load_model, gpu_id, custom_cnn, use_gru, use_distance=True, max_image_width=160, max_image_height=60, max_prediction_length=18, channels=1, reg_val=0): self.use_distance = use_distance self.start_time = str(int(time.time())) # We need resized width, not the actual width max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT self.max_original_width = max_image_width self.max_width = int(math.ceil(max_resized_width)) self.encoder_size = int(math.ceil(1. * self.max_width / 4)) self.decoder_size = max_prediction_length + 2 self.buckets = [(self.encoder_size, self.decoder_size)] if gpu_id >= 0: device_id = '/gpu:' + str(gpu_id) else: device_id = '/cpu:0' self.device_id = device_id if not os.path.exists(model_dir): os.makedirs(model_dir) if phase == 'test': batch_size = 1 logging.info('phase: %s', phase) logging.info('model_dir: %s', model_dir) logging.info('load_model: %s', load_model) logging.info('output_dir: %s', output_dir) logging.info('steps_per_checkpoint: %d', steps_per_checkpoint) logging.info('batch_size: %d', batch_size) logging.info('learning_rate: %f', initial_learning_rate) logging.info('reg_val: %d', reg_val) logging.info('max_gradient_norm: %f', max_gradient_norm) logging.info('clip_gradients: %s', clip_gradients) logging.info('max_image_width %f', max_image_width) logging.info('max_prediction_length %f', max_prediction_length) logging.info('channels: %d', channels) logging.info('target_embedding_size: %f', target_embedding_size) logging.info('attn_num_hidden: %d', attn_num_hidden) logging.info('attn_num_layers: %d', attn_num_layers) logging.info('visualize: %s', visualize) if use_gru: logging.info('using GRU in the decoder.') self.reg_val = reg_val self.sess = session self.steps_per_checkpoint = steps_per_checkpoint self.model_dir = model_dir self.output_dir = output_dir self.batch_size = batch_size self.global_step = tf.Variable(0, trainable=False) self.phase = phase self.visualize = visualize self.learning_rate = initial_learning_rate self.clip_gradients = clip_gradients self.channels = channels train_config_file = "history/training_config" + self.start_time + ".txt" with open(train_config_file, "w") as f: print('model_dir: ', model_dir, file=f) print('phase: ', phase, file=f) print('load_model: ', load_model, file=f) print('output_dir: ', output_dir, file=f) print('steps_per_checkpoint: ', steps_per_checkpoint, file=f) print('batch_size: ', batch_size, file=f) print('learning_rate: ', initial_learning_rate, file=f) print('reg_val: ', reg_val, file=f) print('max_gradient_norm: ', max_gradient_norm, file=f) print('clip_gradients: ', clip_gradients, file=f) print('max_image_width ', max_image_width, file=f) print('max_prediction_length ', max_prediction_length, file=f) print('channels: ', channels, file=f) print('target_embedding_size: ', target_embedding_size, file=f) print('attn_num_hidden: ', attn_num_hidden, file=f) print('attn_num_layers: ', attn_num_layers, file=f) print('visualize: ', visualize, file=f) if phase == 'train': self.forward_only = False else: self.forward_only = True with tf.device(device_id): self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32) self.height_float = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.float64) self.img_pl = tf.placeholder(tf.string, name='input_image_as_bytes') self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1), lambda: tf.expand_dims(self.img_pl, 0), lambda: self.img_pl) self.img_data = tf.map_fn(self._prepare_image, self.img_data, dtype=tf.float32) num_images = tf.shape(self.img_data)[0] # TODO: create a mask depending on the image/batch size self.encoder_masks = [] for i in xrange(self.encoder_size + 1): self.encoder_masks.append(tf.tile([[1.]], [num_images, 1])) self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.decoder_size + 1): self.decoder_inputs.append(tf.tile([1], [num_images])) if i < self.decoder_size: self.target_weights.append(tf.tile([1.], [num_images])) else: self.target_weights.append(tf.tile([0.], [num_images])) if custom_cnn: cnn_model = CNN_cust(self.img_data, not self.forward_only) else: cnn_model = CNN(self.img_data, not self.forward_only) self.conv_output = cnn_model.tf_output() self.perm_conv_output = tf.transpose(self.conv_output, perm=[1, 0, 2]) self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, target_vocab_size=len(DataGen.CHARMAP), buckets=self.buckets, target_embedding_size=target_embedding_size, attn_num_layers=attn_num_layers, attn_num_hidden=attn_num_hidden, forward_only=self.forward_only, use_gru=use_gru) table = tf.contrib.lookup.MutableHashTable( key_dtype=tf.int64, value_dtype=tf.string, default_value="", checkpoint=True, ) insert = table.insert( tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64), tf.constant(DataGen.CHARMAP), ) with tf.control_dependencies([insert]): num_feed = [] prb_feed = [] for line in xrange(len(self.attention_decoder_model.output)): guess = tf.argmax( self.attention_decoder_model.output[line], axis=1) proba = tf.reduce_max(tf.nn.softmax( self.attention_decoder_model.output[line]), axis=1) num_feed.append(guess) prb_feed.append(proba) # Join the predictions into a single output string. trans_output = tf.transpose(num_feed) trans_output = tf.map_fn( lambda m: tf.foldr( lambda a, x: tf.cond( tf.equal(x, DataGen.EOS_ID), lambda: '', lambda: table.lookup(x) + a # pylint: disable=undefined-variable ), m, initializer=''), trans_output, dtype=tf.string) # Calculate the total probability of the output string. trans_outprb = tf.transpose(prb_feed) trans_outprb = tf.gather(trans_outprb, tf.range(tf.size(trans_output))) trans_outprb = tf.map_fn(lambda m: tf.foldr( lambda a, x: tf.multiply(tf.cast(x, tf.float64), a), m, initializer=tf.cast(1, tf.float64)), trans_outprb, dtype=tf.float64) self.prediction = tf.cond( tf.equal(tf.shape(trans_output)[0], 1), lambda: trans_output[0], lambda: trans_output, ) self.probability = tf.cond( tf.equal(tf.shape(trans_outprb)[0], 1), lambda: trans_outprb[0], lambda: trans_outprb, ) self.prediction = tf.identity(self.prediction, name='prediction') self.probability = tf.identity(self.probability, name='probability') if not self.forward_only: # train self.updates = [] self.summaries_by_bucket = [] params = tf.trainable_variables() opt = tf.train.AdadeltaOptimizer( learning_rate=initial_learning_rate) loss_op = self.attention_decoder_model.loss if self.reg_val > 0: reg_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) logging.info('Adding %s regularization losses', len(reg_losses)) logging.debug('REGULARIZATION_LOSSES: %s', reg_losses) loss_op = self.reg_val * tf.reduce_sum( reg_losses) + loss_op gradients, params = list( zip(*opt.compute_gradients(loss_op, params))) if self.clip_gradients: gradients, _ = tf.clip_by_global_norm( gradients, max_gradient_norm) # Summaries for loss, variables, gradients, gradient norms and total gradient norm. summaries = [ tf.summary.scalar("loss", loss_op), tf.summary.scalar("total_gradient_norm", tf.global_norm(gradients)) ] all_summaries = tf.summary.merge(summaries) self.summaries_by_bucket.append(all_summaries) # update op - apply gradients update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.updates.append( opt.apply_gradients(list(zip(gradients, params)), global_step=self.global_step)) self.saver_all = tf.train.Saver(tf.all_variables()) self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt") ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and load_model: # pylint: disable=no-member logging.info("Reading model parameters from %s", ckpt.model_checkpoint_path) self.saver_all.restore(self.sess, ckpt.model_checkpoint_path) else: logging.info("Created model with fresh parameters.") self.sess.run(tf.initialize_all_variables())
def train(x_tr, y_tr, x_va, y_va, x_te, BATCH_SIZE, TEST_SIZE, EPOCH, SAMPLE_NUM, NUM_LABELS, REGULARIZER_RATE, LEARNING_RATE, MODEL_SAVE_PATH, MODEL_NAME, TIMEPNG_NAME, ACCPNG_NAME, LOG_NAME, PREOUT_NAME): ''' x_tr:np.array4D,[samples,height,width,channel] y_tr_tensor:tf.tensor,[samples,labels] BATCH_SIZE:int,训练批量 TRAINING_STEPS:int,训练次数 SAMPLE_NUM:int,训练数据个数 NUM_LABELS:int,类别数 REGULARIZER_RATE:np.float,正则化系数 MOVING_AVERAGE_DECAY:np.float,滑动平均损失 LEARNING_RATE_BASE:np.float,基本学习率 sdasdsaxzcxzssqssLEARNING_RATE_DECAY:np.float,学习率损失率 MODEL_SAVE_PATH:模型文件存放路径 MODEL_NAME:模型名称 CONV1_SIZE:int,卷积层1核的大小 NUM_CHANNELS:int,输入数据的通道数 CONV1_DEEP:int,sadsadsad:w 卷积层1核的数目 CONV2_SIZE:int,卷积层2核的大小 CONV2_DEEP:int,卷积层2核的数目 FC_SIZE:int,全连接层隐含层大小 ''' MODEL_PATH = MODEL_SAVE_PATH accuracy_report = [] loss_report = [] time_report = [] g2 = tf.Graph() with g2.as_default(): x_holder = tf.placeholder( tf.float32, [None, x_tr.shape[1], x_tr.shape[2], NUM_CHANNELS], name='x-input') y_holder = tf.placeholder(tf.float32, [None, NUM_LABELS], name='y-input') regularizer = tf.contrib.layers.l2_regularizer(REGULARIZER_RATE) sub_pred, pred = inference(NUM_LABELS, x_holder, regularizer, False, True) # variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step) # variable_averages_op = variable_averages.apply(tf.trainable_variables()) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred, labels=tf.argmax(y_holder, 1)) cross_entropy_sum = tf.reduce_sum(cross_entropy) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) # learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,int(SAMPLE_NUM/BATCH_SIZE),LEARNING_RATE_DECAY) opt = tf.train.GradientDescentOptimizer(LEARNING_RATE) grads_and_vars = opt.compute_gradients(loss, tf.trainable_variables()) capped_grads_and_vars = [(MyCapper(gv[0]), gv[1]) for gv in grads_and_vars] # train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss,global_step=global_step) train_op = opt.apply_gradients(capped_grads_and_vars) correction_prediction = tf.equal(tf.argmax(y_holder, 1), tf.argmax(pred, 1)) accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32)) out = tf.argmax(pred, 1) init = tf.initialize_all_variables() g2.finalize() # with tf.control_dependencies([train_step,variable_averages_op]): # train_op = tf.no_op(name='train') # saver = tf.train.Saver() with tf.Session(graph=g2) as sess2: sess2.run(init) epoch = 0 max_batch = x_tr.shape[0] / BATCH_SIZE while epoch <= EPOCH: batch = 0 while batch * BATCH_SIZE <= x_tr.shape[0]: if (batch + 1) * BATCH_SIZE <= x_tr.shape[0]: xs = x_tr[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE, :] ys = y_tr[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE, :] batch += 1 else: xs = x_tr[batch * BATCH_SIZE:x_tr.shape[0], :] ys = y_tr[batch * BATCH_SIZE:x_tr.shape[0], :] batch += 1 print('will break') start_time = time.time() _, cem_value, loss_value = sess2.run( [train_op, cross_entropy_mean, loss], feed_dict={ x_holder: xs, y_holder: ys }) end_time = time.time() # 测试与输出频率 if batch % 100 == 0: log = 'epoch:%d/%d batch:%d/%d lr:%s cem:%f ce+l2:%f time:%f' % ( epoch, EPOCH, batch, max_batch, LEARNING_RATE, cem_value, loss_value, end_time - start_time) print log f = open(LOG_NAME, 'a') f.write(log + '\n') f.close() epoch += 1 accuracy_i_list = [] for i in range(TEST_SIZE): if i % 500 == 0: print('accuracy_calu:%d/%d' % (i, TEST_SIZE)) x_va_i = np.reshape( x_va[i, :, :, :], [1, x_va.shape[1], x_va.shape[2], x_va.shape[3]]) y_va_i = np.reshape(y_va[i, :], [1, y_va.shape[1]]) accuracy_i = sess2.run(accuracy, feed_dict={ x_holder: x_va_i, y_holder: y_va_i }) accuracy_i_list.append(accuracy_i) accuracy_score = np.mean(np.array(accuracy_i_list)) print accuracy_score f = open(LOG_NAME, 'a') f.write(str(accuracy_score) + '\n') f.close() accuracy_report.append(accuracy_score) plot_report1(accuracy_report, ACCPNG_NAME) # saver.save(sess,os.path.join(MODEL_SAVE_PATH,MODEL_NAME),global_step=global_step) sub_list = [] for i in range(x_tr.shape[0]): if i % 500 == 0: print('o_list:%d/%d' % (i, x_tr.shape[0])) o_i_x = np.reshape( x_tr[i, :, :, :], [1, x_tr.shape[1], x_tr.shape[2], x_tr.shape[3]]) o_i_y = np.reshape(y_tr[i, :], [1, y_tr.shape[1]]) sub_i = sess2.run(sub_pred, feed_dict={ x_holder: o_i_x, y_holder: o_i_y }) sub_list.append(sub_i) sub_array = np.vstack(sub_list) np.save(PREOUT_NAME, sub_array)
def train(self, config): """Train DCGAN""" # first setup validation data data = sorted(glob(os.path.join("./data", config.dataset, "valid", "*.jpg"))) g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_loss, var_list=self.g_vars) tf.initialize_all_variables().run() self.saver = tf.train.Saver() self.g_sum = tf.summary.merge([self.G_sum, self.g_loss_sum]) self.writer = tf.summary.FileWriter("./logs", self.sess.graph) sample_files = data[0:self.sample_size] sample = [get_image(sample_file, self.image_size, is_crop=self.is_crop) for sample_file in sample_files] sample_inputs = [doresize(xx, [self.input_size,]*2) for xx in sample] sample_images = np.array(sample).astype(np.float32) sample_input_images = np.array(sample_inputs).astype(np.float32) save_images(sample_input_images, [8, 8], './samples/inputs_small.png') save_images(sample_images, [8, 8], './samples/reference.png') counter = 1 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") # we only save the validation inputs once have_saved_inputs = False for epoch in xrange(config.epoch): data = sorted(glob(os.path.join("./data", config.dataset, "train", "*.jpg"))) batch_idxs = min(len(data), config.train_size) // config.batch_size for idx in xrange(0, batch_idxs): batch_files = data[idx*config.batch_size:(idx+1)*config.batch_size] batch = [get_image(batch_file, self.image_size, is_crop=self.is_crop) for batch_file in batch_files] input_batch = [doresize(xx, [self.input_size,]*2) for xx in batch] batch_images = np.array(batch).astype(np.float32) batch_inputs = np.array(input_batch).astype(np.float32) # Update G network _, summary_str, errG = self.sess.run([g_optim, self.g_sum, self.g_loss], feed_dict={ self.inputs: batch_inputs, self.images: batch_images }) self.writer.add_summary(summary_str, counter) counter += 1 print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, errG)) if np.mod(counter, 100) == 1: samples, g_loss, up_inputs = self.sess.run( [self.G, self.g_loss, self.up_inputs], feed_dict={self.inputs: sample_input_images, self.images: sample_images} ) if not have_saved_inputs: save_images(up_inputs, [8, 8], './samples/inputs.png') have_saved_inputs = True save_images(samples, [8, 8], './samples/valid_%s_%s.png' % (epoch, idx)) print("[Sample] g_loss: %.8f" % (g_loss)) if np.mod(counter, 500) == 2: self.save(config.checkpoint_dir, counter)
def initialize(self): n_series = self.n_series features = self.features serie_length = self.serie_length # the last action performed (default is 0 - Neutral) old_action = 0. # this is the content of the lstm cells at time -1 lstm = np.zeros((1,self.nLSTMCells)).astype(np.float32) # this is the output of the lstm at time -1 lstm_out = np.zeros((1,self.nLSTMCells)).astype(np.float32) # output of the network (decision) through time out = [] # reward through time reward = [] #Stock price derivative Z = [] for _ in range(serie_length): Z.append(tf.placeholder("float", [None,n_series])) #Stock cost C = [] for _ in range(serie_length): C.append(tf.placeholder("float", [None,n_series])) #Features F = [] for _ in range(serie_length): F.append(tf.placeholder("float", [None,features])) # unfold through time for t in xrange(serie_length): # As we remember the shape of the dataset is (n_row, length, n_series, n_features) # each Z should be a vertical vector (n_row, 1, n_series, n_features) # Merge of the input print "Unfold: ", t+1, "out of", serie_length self.inputShared1_var = [] inputShared1 = Merge([self.norm_prices(Z[t]),self.norm_costs(C[t]),self.norm_features(F[t])],[n_series,n_series,features],n_series*2 + features,tf.tanh,self.inputShared1_var) self.sharedBlock1_var = [] # Shared block 1: elaboration of the input sharedBlock1 = Block(inputShared1, n_series*2 + features , [self.sharedBoxShape[1]]*self.sharedBoxShape[0], tf.tanh, self.sharedBlock1_var , dropout=self.dropout) self.inputShared2_var = [] # Features given by shared1 and lstm inputShared2 = Merge([sharedBlock1, lstm_out] ,[self.sharedBoxShape[1],self.nLSTMCells] , self.sharedBoxShape[1] ,tf.tanh, self.inputShared2_var) self.sharedBlock2_var = [] sharedBlock2 = Block(inputShared2, self.sharedBoxShape[1], [self.sharedBoxShape[1]] * self.sharedBoxShape[0], tf.tanh, self.sharedBlock2_var, dropout=self.dropout) # Each block represent a gate for the LSTM Cells self.block1_var = [] self.block2_var = [] self.block3_var = [] self.block4_var = [] block1 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block1_var, dropout=self.dropout) block2 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block2_var, dropout=self.dropout) block3 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block3_var, dropout=self.dropout) block4 = Block(sharedBlock2, self.sharedBoxShape[1], [self.blocksShape[1]] * self.blocksShape[0] + [self.nLSTMCells], tf.tanh, self.block4_var, dropout=self.dropout) #LSTM cells lstm, lstm_out = Lstm(block1, block2, block3, block4, lstm) self.outerBlock_var = [] outerBlock = Block(lstm_out, self.nLSTMCells, [self.decisionBlockShape[1]] * self.decisionBlockShape[0] + [n_series], tf.tanh, self.outerBlock_var, dropout=self.dropout) out_temp = outerBlock out.append(outerBlock) reward.append(tf.reduce_sum(d(old_action,out_temp, self.denorm_prices(Z[t]), self.denorm_costs(C[t])))) old_action = out_temp r = 0. for i in xrange(serie_length): r = r + tf.reduce_sum(reward[i]) # we should max r, or the same min -r self.optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(-r) self.tot_reward = r self.out = out self.Z = Z self.C = C self.F = F init = tf.initialize_all_variables() self.session = tf.Session() self.session.run(init)
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: conv_keep_prob = 0.7 else: conv_keep_prob = 1.0 file_name = 'out_' + str(int(time.time())) + '.csv' f = open(file_name, 'w') # clear file f.write('dataset_num,dataset_name,roc_auc\n') f.close() for dataset_num in range(0, len(_datasets)): load_ENCODE_k562_dataset(dataset_num) x = tf.placeholder(tf.float32, shape=[None, 101 * 4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) # Create the model x_image = tf.reshape(x, [-1, 101, 4, 1]) # CONVOLUTIONS convs_list_flat = [] for L_conv1 in range( 3, 101, 9 ): # try conv kernels up to some fraction of the dna sequence length 101 n_conv1 = 768 conv1 = convolution2d(x_image, n_conv1, [L_conv1, 4], padding='VALID', normalizer_fn=None) conv1_drop_len = int(101 - L_conv1 + 1) conv1_pool = max_pool2d(conv1, [conv1_drop_len, 1], [conv1_drop_len, 1]) # global max-pooling #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob) conv1_flat = flatten(conv1_pool) convs_list_flat.append(conv1_flat) # LINEAR FC LAYER conv_flat = tf.concat(1, convs_list_flat) y_conv = fully_connected(conv_flat, 2, activation_fn=None) y_conv_softmax = tf.nn.softmax(y_conv) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 stop_condition = None t0 = time.time() while stop_condition is None: if i % 1000 == 0: t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024 * 5: validation_batch = get_next_batch(1, 1024) else: validation_batch = get_next_batch(1, 64) if pred_validation_labels is None: pred_validation_labels = y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) true_validation_labels = validation_batch[1] else: pred_validation_labels = numpy.vstack([ pred_validation_labels, y_conv_softmax.eval(feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) ]) true_validation_labels = numpy.vstack( [true_validation_labels, validation_batch[1]]) fpr, tpr, _ = roc_curve(true_validation_labels[:, 0], pred_validation_labels[:, 0]) roc_auc = auc(fpr, tpr) #check stop condition: perc_chg_auc = (roc_auc - prev_auc) / prev_auc #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5% # stop_condition = 1 prev_auc = roc_auc print( "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g" % (_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time() - t0, roc_auc, perc_chg_auc)) t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={ x: batch[0], y_: batch[1], dropout_on: 1 }) if i == 7000: stop_condition = 1 i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([ pred_test_labels, y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) ]) true_test_labels = numpy.vstack( [true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0]) roc_auc = auc(fpr, tpr) print("%s, dataset %g, final test roc auc %g" % (_datasets[dataset_num], dataset_num, roc_auc)) f = open(file_name, 'a') f.write( str(dataset_num) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + '\n') f.close()
def train(mnist): # x为输入的图片点阵信号 x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name="x-input") # y_为输入的目标分类信号 y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name="y-input") # 正则化函数使用L2正则化 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) # 直接使用mnist_inference.py中定义的前向传播过程(可以认为,前向传播的过程是完全一致的) y = mnist_inference.inference(x, regularizer) global_step = tf.Variable(0, trainable=False) # 和5.2.1小节样例中类似地定义损失函数、学习率、滑动平均操作以及训练过程 # 这部分开始都是在定义反向传播 variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) ''' 求输出的softmax回归结果,并且对其进行交叉熵运算 labels = tf.argmax(*, 1)表示对每一行的最大值取其数组下标, 其矩阵形状为[BATCH_SIZE, 1] logits = y接受的是前向传播的预测结果,其矩阵形状为[BATCH_SIZE, CLASS_NUM] 该函数简化了交叉熵的计算过程(由于one-hot非正确的标签都为0,所以都可以忽略计算), 交叉熵 = -log(y正确下标) ''' cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.argmax(y_, 1), logits=y) ''' reduce_mean函数的作用是降维操作,由于计算好交叉熵以后的矩阵形状为[BATCH_SIZE, 1],但损失函数只有一个单值 因此需要把BATCH_SIZE方向上的纬度降下来 ''' cross_entropy_mean = tf.reduce_mean(cross_entropy) ''' 总的损失函数应该等于交叉熵的平均值 + L2正则化的和,其中L2正则化结果也需要全部贾总起来 正则化的主要目的是为了防止过拟合现象 ''' loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) ''' tf.train.exponential_decay函数的主要作用是生成每一步的学习率(根据以下公式) decayed_learning_rate= learning_rate * decay_rate ^ (global_step/decay_steps) ''' learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate)\ .minimize(loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name="train") # 初始化Tensorflow持久化类 saver = tf.train.Saver() with tf.Session() as sess: tf.initialize_all_variables().run() # 在训练过程中不再测试模型在验证数据上的表现,验证和测试的过程将会有一个独立的程序来完成 for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={ x: xs, y_: ys }) # 每1000轮保存一次模型 if i % 1000 == 0: ''' 输出当前的训练情况。 这里指输出了模型在当前训练batch上的损失函数大小。 通过损失函数的大小可以大概了解训练的情况。在验证数据集上的正确率信息 会有一个单独的程序来生成 ''' print("After %d training step(s), \ loss on training batch is %g." % (step, loss_value)) ''' 保存当前的模型。 注意这里给出了global_step参数,这样可以让每个被保存 模型的文件名末尾加上训练的轮数,比如"model.ckpt-1000"表示训练1000轮 之后得到的模型 ''' saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs(False) # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def _create_initialization(self): self.initialize = tf.initialize_all_variables()
def trainNetwork(s, readout, sess): """ Train the artificial agent using Q-learning to play the pong game. Args: s: the current state formed by 4 frames of the playground. readout: the Q value for each passible action in the current state. sess: session """ # Placeholder for the action. a = tf.placeholder("float", [None, ACTIONS]) # Placeholder for the target Q value. y = tf.placeholder("float", [None]) # Compute the loss. cost = compute_cost(y, a, readout) # Training operation. train_step = tf.train.AdamOptimizer(Lr).minimize(cost) # Open up a game state to communicate with emulator. game_state = game.GameState() # Initialize the replay memory. D = deque() # Initialize the action vector. do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 # Initialize the state of the game. x_t, r_0, terminal = game_state.frame_step(do_nothing) x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY) ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # Save and load model checkpoints. saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("saved_networks_q_learning") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") # Initialize the epsilon value for the exploration phase. epsilon = INITIAL_EPSILON # Initialize the iteration counter. t = 0 while True: # Choose an action epsilon-greedily. readout_t = readout.eval(feed_dict={s: [s_t]})[0] action_index = get_action_index(readout_t, epsilon, t) a_t = np.zeros([ACTIONS]) a_t[action_index] = 1 # Scale down epsilon during the exploitation phase. epsilon = scale_down_epsilon(epsilon, t) # Run the selected action and update the replay memeory for i in range(0, K): # Run the selected action and observe next state and reward. s_t1, r_t, terminal = run_selected_action(a_t, s_t, game_state) # Store the transition in the replay memory D. D.append((s_t, a_t, r_t, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() # Start training once the observation phase is over. if (t > OBSERVE): # Sample a minibatch to train on. minibatch = random.sample(D, BATCH) # Get the batch variables. s_j_batch = [d[0] for d in minibatch] a_batch = [d[1] for d in minibatch] r_batch = [d[2] for d in minibatch] s_j1_batch = [d[3] for d in minibatch] terminal_batch = [d[4] for d in minibatch] # Compute the target Q-Value readout_j1_batch = readout.eval(feed_dict={s: s_j1_batch}) target_q_batch = compute_target_q(r_batch, readout_j1_batch, terminal_batch) # Perform gradient step. train_step.run(feed_dict={ y: target_q_batch, a: a_batch, s: s_j_batch }) # Update the state. s_t = s_t1 # Update the number of iterations. t += 1 # Save a checkpoint every 10000 iterations. if t % 10000 == 0: saver.save(sess, 'saved_networks_q_learning/' + GAME + '-dqn', global_step=t) # Print info. state = "" if t <= OBSERVE: state = "observe" elif t > OBSERVE and t <= OBSERVE + EXPLORE: state = "explore" else: state = "train" print("TIMESTEP", t, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t))
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) ######################## # Get images and labels images_tr, labels_tr = mnistip.distorted_inputs(randFlip=False) images_ev, labels_ev = mnistip.inputs(eval_data=True) ######################## # VAE ZONE images = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, imHeight, imWidth, numCh]) # vae_code = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, CODE_LEN]) # Define Encoder z_mean, z_stddev, t_num_dim = ved.recognition(images, CODE_LEN) # Draw new sample samples = tf.random_normal([FLAGS.batch_size,CODE_LEN],0,1,dtype=tf.float32) guessed_z = z_mean + (z_stddev * samples) # Define Decoder im_gen = ved.generation(guessed_z, t_num_dim) # Compute Loss Values generation_loss = -tf.reduce_sum(images * tf.log(1e-8 + im_gen) + (1-images) * tf.log(1e-8 + 1 - im_gen),[1,2,3]) latent_loss = 0.5 * tf.reduce_sum(tf.square(z_mean) + tf.square(z_stddev) - tf.log(tf.square(z_stddev)) - 1,1) total_loss = tf.reduce_mean(generation_loss + latent_loss) # Optimize now train_op = tf.train.AdamOptimizer(0.001).minimize(total_loss) ##################### # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) ######################### visualization, _ = sess.run([images_ev, labels_ev]) reshaped_vis = np.squeeze(visualization) ims("results/base.jpg",merge(reshaped_vis[:64],[8,8])) for step in xrange(FLAGS.max_steps): _images, _labels = sess.run([images_tr, labels_tr]) _, lossGen, lossLat = sess.run([train_op, generation_loss, latent_loss], feed_dict={images: _images}) if step % 20 == 0: format_str = ('%s: Step %d, GEN-loss = %.2f, LAT-loss = %.2f\n') print (format_str % (datetime.now(), step, np.mean(lossGen), np.mean(lossLat))) # save intermediate results generated_test = sess.run(im_gen, feed_dict={images: visualization}) generated_test = np.squeeze(generated_test) ims("results/"+str(step)+".jpg",merge(generated_test[:64],[8,8]))
def run(self): inputs = tf.nn.embedding_lookup(self.word_embedding, self.x) prob = self.model(inputs) #修改 with tf.name_scope('loss'): cost = - tf.reduce_mean(self.y * tf.log(prob)) # reg, variables = tf.nn.l2_loss(self.word_embedding), ['softmax'] # for vari in variables: reg = tf.nn.l2_loss(self.weights['softmax']) + \ tf.nn.l2_loss(self.biases['softmax']) reg += tf.nn.l2_loss(self.weights['w_1']) + \ tf.nn.l2_loss(self.biases['w_1']) reg += tf.nn.l2_loss(self.weights['u_1']) cost += reg * self.l2_reg #relationship relation = 0. tmp = self.weights['softmax'] # omiga = np.zeros([8, 8]) omiga = np.array([[1, -0.5, 0.5, -1, 0, -0.5, 0, 0.5], [-0.5, 1, -1, 0.5, -0.5, 0, 0.5, 0], [0.5, -1, 1, -0.5, 0.5, 0, -0.5, 0], [-1, 0.5, -0.5, 1, 0, 0.5, 0, -0.5], [0, -0.5, 0.5, 0, 1, 0.5, -1, -0.5], [-0.5, 0, 0, 0.5, 0.5, 1, -0.5, -1], [0, 0.5, -0.5, 0, -1, -0.5, 1, 0.5], [0.5, 0, 0, -0.5, -0.5, -1, 0.5, 1] ]) for i in range(8): for j in range(8): relation += tf.nn.l2_loss(tmp[:,i] - tmp[:,j]) * omiga[i,j] cost += relation * self.rela_reg with tf.name_scope('train'): global_step = tf.Variable( 0, name="tr_global_step", trainable=False) optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(cost, global_step=global_step) with tf.name_scope('predict'): correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) with tf.name_scope('summary'): localtime = time.strftime("%X %Y-%m-%d", time.localtime()) Summary_dir = 'Summary/' + localtime info = 'batch-{}, lr-{}, kb-{}, l2_reg-{}'.format( self.batch_size, self.learning_rate, self.Keep_Prob, self.l2_reg) info = info + '\ntrain_file_path:' + self.train_file_path + '\ntest_index:' + str(self.test_index) + '\nembedding_type:' + str(self.embedding_type) + '\nMethod: Emotion_GRU' summary_acc = tf.scalar_summary('ACC ' + info, accuracy) summary_loss = tf.scalar_summary('LOSS ' + info, cost) summary_op = tf.merge_summary([summary_loss, summary_acc]) test_acc = tf.placeholder(tf.float32) test_loss = tf.placeholder(tf.float32) summary_test_acc = tf.scalar_summary('ACC ' + info, test_acc) summary_test_loss = tf.scalar_summary('LOSS ' + info, test_loss) summary_test = tf.merge_summary( [summary_test_loss, summary_test_acc]) train_summary_writer = tf.train.SummaryWriter( Summary_dir + '/train') test_summary_writer = tf.train.SummaryWriter(Summary_dir + '/test') with tf.name_scope('saveModel'): saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) save_dir = 'Models/' + localtime + '/' if not os.path.exists(save_dir): os.makedirs(save_dir) with tf.name_scope('readData'): print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime())) tr_x, tr_y, tr_doc_len, te_x, te_y, te_doc_len, ev_x, ev_y, ev_doc_len= load_data_for_Emotion_CNN( self.train_file_path, self.word_id_mapping, self.max_doc_len, self.test_index, self.n_class ) print 'train docs: {} test docs: {}'.format(len(tr_y), len(te_y)) print 'training_iter:', self.training_iter print info print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime())) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session() as sess: sess.run(tf.initialize_all_variables()) max_acc, bestIter = 0., 0 def test(): acc, loss, cnt = 0., 0., 0 for test, num in self.get_batch_data(te_x, te_y, te_doc_len, 20, keep_prob=1.0): _loss, _acc = sess.run([cost, correct_num], feed_dict=test) acc += _acc loss += _loss * num cnt += num loss = loss / cnt acc = acc / cnt return loss, acc def new_test(): feed_dict = { self.x: ev_x, self.doc_len: ev_doc_len, self.keep_prob: 1.0, } y_true = ev_y y_pred_p = sess.run(prob, feed_dict=feed_dict) # y_pred = np.ceil(y_pred_p-0.15) #y_pred = calibrated_label_ranking(y_pred_p) y_pred = calibrated_label_ranking(y_pred_p, For_calibrated_B) Emotion_eval(y_true, y_pred, y_pred_p) if self.training_iter==0: saver.restore(sess, 'Models/10:01:44 2017-03-11/-856') loss, acc=test() print loss,acc new_test() For_calibrated_B = np.loadtxt('For_calibrated_B'+str(self.test_index)+'.txt', delimiter=',') for i in xrange(self.training_iter): for train, _ in self.get_batch_data(tr_x, tr_y, tr_doc_len, self.batch_size, self.Keep_Prob): _, step, summary, loss, acc = sess.run( [optimizer, global_step, summary_op, cost, accuracy], feed_dict=train) train_summary_writer.add_summary(summary, step) print 'Iter {}: mini-batch loss={:.6f}, acc={:.6f}'.format(step, loss, acc) if i % self.display_step == 0: loss, acc=test() if acc > max_acc: max_acc = acc bestIter = step saver.save(sess, save_dir, global_step=step) new_test() summary = sess.run(summary_test, feed_dict={ test_loss: loss, test_acc: acc}) test_summary_writer.add_summary(summary, step) print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime())) print 'Iter {}: test loss={:.6f}, test acc={:.6f}'.format(step, loss, acc) print 'round {}: max_acc={} BestIter={}\n'.format(i, max_acc, bestIter) print 'Optimization Finished!'
def train(self, config): """Train DCGAN""" data = glob(os.path.join("./data", config.dataset, "*.jpg")) #np.random.shuffle(data) d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.d_loss, var_list=self.d_vars) g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \ .minimize(self.g_loss, var_list=self.g_vars) tf.initialize_all_variables().run() self.saver = tf.train.Saver() self.g_sum = tf.summary.merge([self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum, self.g_loss_sum]) self.d_sum = tf.summary.merge([self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum]) self.writer = tf.summary.FileWriter("./logs", self.sess.graph_def) sample_z = np.random.uniform(-1, 1, size=(self.sample_size , self.z_dim)) sample_files = data[0:self.sample_size] sample = [get_image(sample_file, self.image_size, is_crop=self.is_crop) for sample_file in sample_files] sample_images = np.array(sample).astype(np.float32) counter = 1 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") for epoch in range(config.epoch): data = glob(os.path.join("./data", config.dataset, "*.jpg")) batch_idxs = min(len(data), config.train_size)//config.batch_size for idx in range(0, batch_idxs): batch_files = data[idx*config.batch_size:(idx+1)*config.batch_size] batch = [get_image(batch_file, self.image_size, is_crop=self.is_crop) for batch_file in batch_files] batch_images = np.array(batch).astype(np.float32) batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \ .astype(np.float32) # Update D network _, summary_str = self.sess.run([d_optim, self.d_sum], feed_dict={ self.images: batch_images, self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Update G network _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z }) self.writer.add_summary(summary_str, counter) # Run g_optim twice to make sure that d_loss does not go to zero (different from paper) _, summary_str = self.sess.run([g_optim, self.g_sum], feed_dict={ self.z: batch_z }) self.writer.add_summary(summary_str, counter) errD_fake = self.d_loss_fake.eval({self.z: batch_z}) errD_real = self.d_loss_real.eval({self.images: batch_images}) errG = self.g_loss.eval({self.z: batch_z}) counter += 1 print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, errD_fake+errD_real, errG)) if np.mod(counter, 100) == 1: samples, d_loss, g_loss = self.sess.run( [self.sampler, self.d_loss, self.g_loss], feed_dict={self.z: sample_z, self.images: sample_images} ) save_images(samples, [8, 8], './samples/train_%s_%s.png' % (epoch, idx)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss)) if np.mod(counter, 500) == 2: self.save(config.checkpoint_dir, counter)