def train(mnist): x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name="x-input") y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name="y-input") regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y = mnist_inference.inference(x, regularizer) global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate)\ .minimize(loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, setp = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys}) if i % 1000 == 0: print("%d 训练后,损失值为 %g" % (i, loss_value)) saver.save(sess, MODEL_SAVE_PATH, global_step=global_step)
def testSplitApplyMerge(self): # Repeatability. SGD has a tendency to jump around, even here. tf.set_random_seed(1) with self.test_session() as sess: # Use sampling to train REINFORCE with st.value_type(st.SampleAndReshapeValue(n=1)): (route_selection, routing_loss, final_loss) = build_split_apply_merge_model() sgd = tf.train.GradientDescentOptimizer(1.0).minimize(final_loss) tf.global_variables_initializer().run() for i in range(10): # Run loss and inference step. This toy problem converges VERY quickly. (routing_loss_v, final_loss_v, route_selection_v, _) = sess.run( [routing_loss, final_loss, tf.identity(route_selection), sgd]) print( "Iteration %d, routing loss: %s, final_loss: %s, " "route selection: %s" % (i, routing_loss_v, final_loss_v, route_selection_v)) self.assertAllEqual([0, 0, 1, 1], route_selection_v) self.assertAllClose([0.0, 0.0, 0.0, 0.0], routing_loss_v) self.assertAllClose(0.0, final_loss_v)
def testConv2DReflect(self): np.random.seed(768798) x_shape = [4, 10, 12, 6] f_shape = [3, 4, 6, 5] strides = [1, 2, 2, 1] padding = 'REFLECT' conv = blocks_std.Conv2D(depth=f_shape[-1], filter_size=f_shape[0:2], strides=strides[1:3], padding=padding, act=None, bias=None) x_value = np.random.normal(size=x_shape) x = tf.convert_to_tensor(x_value, dtype=tf.float32) y = conv(x) with self.test_session(): tf.global_variables_initializer().run() f_value = conv._kernel.eval() y_value = y.eval() y_expected = _NumpyConv2D(x_value, f_value, strides=strides, padding=padding) self.assertAllClose(y_expected, y_value)
def testAdagradDAWithL1(self): for dtype in [tf.float64, tf.float32]: with self.test_session() as sess: global_step = tf.Variable(0, dtype=tf.int64) var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([4.0, 3.0], dtype=dtype) grads0 = tf.constant([0.1, 0.2], dtype=dtype) grads1 = tf.constant([0.01, 0.02], dtype=dtype) opt = tf.train.AdagradDAOptimizer( 3.0, global_step, initial_gradient_squared_accumulator_value=0.1, l1_regularization_strength=0.001, l2_regularization_strength=0.0) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1]), global_step=global_step) tf.global_variables_initializer().run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) # Run a step of AdagradDA update.run() v0_val, v1_val = sess.run([var0, var1]) self.assertAllCloseAccordingToType( np.array([-0.895489, -1.59555]), v0_val) self.assertAllCloseAccordingToType( np.array([-0.085339, -0.17989]), v1_val)
def init_training_graph(self): with tf.name_scope('Evaluation'): # self.logits = self.conv_layer_f(self.last, self.logits_weight, strides=[1,1,1,1], scope_name="logits/") with tf.name_scope("logits/"): self.logits2 = tf.nn.conv2d(self.last, self.logits_weight, strides=[1,1,1,1], padding="VALID") self.logits = tf.nn.bias_add(self.logits2, self.logits_biases) self.predictions = self.logits #self.predictions = tf.squeeze(self.logits, [3]) #softmax = tf.nn.softmax(self.logits) #print softmax.get_shape() #self.predictions = tf.slice(softmax, [0, 0, 0, 0], [-1, -1, -1, 1]) with tf.name_scope('Loss'): self.loss = tf.reduce_mean(tf.losses.mean_squared_error(self.logits, self.train_labels_node)) #self.loss = tf.reduce_mean(tf.losses.mean_squared_error(self.predictions, self.train_labels_node)) tf.summary.scalar("mean_squared_error", self.loss) self.predictions = tf.squeeze(self.predictions, [3]) self.train_prediction = self.predictions self.test_prediction = self.predictions tf.global_variables_initializer().run() print('Computational graph initialised')
def testBasic(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.test_session(): var0 = tf.Variable([1.1, 2.1], dtype=dtype) var1 = tf.Variable([3., 4.], dtype=dtype) grads0 = tf.constant([0.1, 0.1], dtype=dtype) grads1 = tf.constant([0.01, 0.01], dtype=dtype) decay_rate = 0.53 sgd_optimizer = tfp.optimizer.StochasticGradientLangevinDynamics( 3., preconditioner_decay_rate=decay_rate) sgd_op = sgd_optimizer.apply_gradients( zip([grads0, grads1], [var0, var1])) tf.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) self.assertAllCloseAccordingToType([3., 4.], var1.eval()) # Run 1 step of sgd sgd_op.run() # Validate updated params grads_scaled = (0.5 * 0.1 / np.sqrt(decay_rate + (1. - decay_rate) * 0.1**2 + 1e-8)) # Note that `tfp.math.diag_jacobian(xs=var, ys=grad)` returns zero # tensor self.assertAllCloseAccordingToType( [1.1 - 3. * grads_scaled, 2.1 - 3. * grads_scaled], var0.eval()) grads_scaled = (0.5 * 0.01 / np.sqrt( decay_rate + (1. - decay_rate) * 0.01**2 + 1e-8)) self.assertAllCloseAccordingToType( [3. - 3. * grads_scaled, 4. - 3. * grads_scaled], var1.eval()) self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval())
def testYesShuffle(self): id_source = rs.ReaderSource(reader_cls=tf.IdentityReader, work_units=self.work_units, batch_size=1, shuffle=True, num_threads=10, seed=1234) index_column, value_column = id_source() cache = {} index_tensor = index_column.build(cache) value_tensor = value_column.build(cache) self.assertEqual([1], index_tensor.get_shape().as_list()) self.assertEqual([1], value_tensor.get_shape().as_list()) seen = set([]) with self.test_session() as sess: tf.global_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(500): index, value = sess.run([index_tensor, value_tensor]) self.assertEqual(index, value) self.assertNotIn(int(value[0]), seen) seen.add(int(value[0])) coord.request_stop() coord.join(threads)
def testDepthOrderKernel(self): kernel_size = 1 input_depth = 7 output_depth = input_depth kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] kernel_feed = np.ones(kernel_shape) x_shape = [5] * 3 + [input_depth] x_feed = np.ones(x_shape) y_expected = np.zeros(x_shape[0:3] + [output_depth]) y_expected[:, :, :] = np.arange(output_depth) init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) masked_conv2d = blocks_masked_conv2d.DepthOrderConv2D( output_depth, [kernel_size] * 2, [1] * 2, 'SAME', strict_order=True, initializer=init_kernel) x = tf.placeholder(dtype=tf.float32, shape=x_shape) y = masked_conv2d(x) with self.test_session(): tf.global_variables_initializer().run() y_value = y.eval(feed_dict={x: x_feed}) self.assertAllEqual(y_expected, y_value)
def basic_operation(): v1 = tf.Variable(10) v2 = tf.Variable(5) addv = v1 + v2 print(addv) print(type(addv)) print(type(v1)) c1 = tf.constant(10) c2 = tf.constant(5) addc = c1 + c2 print(addc) print(type(addc)) print(type(c1)) # 用来运行计算图谱的对象/实例? # session is a runtime sess = tf.Session() # Variable -> 初始化 -> 有值的Tensor tf.global_variables_initializer().run(session=sess) print('变量是需要初始化的') print('加法(v1, v2) = ', addv.eval(session=sess)) print('加法(v1, v2) = ', sess.run(addv)) print('加法(c1, c2) = ', addc.eval(session=sess))
def testSameVariablesClear(self): server = tf.train.Server.create_local_server() # Creates a graph with 2 variables. v0 = tf.Variable([[2, 1]], name="v0") v1 = tf.Variable([[1], [2]], name="v1") v2 = tf.matmul(v0, v1) # Verifies that both sessions connecting to the same target return # the same results. sess_1 = tf.Session(server.target) sess_2 = tf.Session(server.target) sess_1.run(tf.global_variables_initializer()) self.assertAllEqual([[4]], sess_1.run(v2)) self.assertAllEqual([[4]], sess_2.run(v2)) # Resets target. sessions abort. Use sess_2 to verify. tf.Session.reset(server.target) with self.assertRaises(tf.errors.AbortedError): self.assertAllEqual([[4]], sess_2.run(v2)) # Connects to the same target. Device memory for the variables would have # been released, so they will be uninitialized. sess_2 = tf.Session(server.target) with self.assertRaises(tf.errors.FailedPreconditionError): sess_2.run(v2) # Reinitializes the variables. sess_2.run(tf.global_variables_initializer()) self.assertAllEqual([[4]], sess_2.run(v2)) sess_2.close()
def testRasterScanKernel(self): kernel_size = 5 input_depth = 1 output_depth = 1 kernel_shape = [kernel_size, kernel_size, input_depth, output_depth] # pylint: disable=bad-whitespace kernel_feed = [[ 1.0, 2.0, 3.0, 4.0, 5.0], [ 6.0, 7.0, 8.0, 9.0, 10.0], [11.0, 12.0, 13.0, 14.0, 15.0], [16.0, 17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0, 25.0]] kernel_feed = np.reshape(kernel_feed, kernel_shape) kernel_expected = [[ 1.0, 2.0, 3.0, 4.0, 5.0], [ 6.0, 7.0, 8.0, 9.0, 10.0], [11.0, 12.0, 0.0, 0.0, 0.0], [ 0.0, 0.0, 0.0, 0.0, 0.0], [ 0.0, 0.0, 0.0, 0.0, 0.0]] kernel_expected = np.reshape(kernel_expected, kernel_shape) # pylint: enable=bad-whitespace init_kernel = lambda s, t: tf.constant(kernel_feed, dtype=t, shape=s) masked_conv2d = blocks_masked_conv2d.RasterScanConv2D( output_depth, [kernel_size] * 2, [1] * 2, 'SAME', initializer=init_kernel) x = tf.placeholder(dtype=tf.float32, shape=[10] * 3 + [input_depth]) _ = masked_conv2d(x) with self.test_session(): tf.global_variables_initializer().run() kernel_value = masked_conv2d._kernel.eval() self.assertAllEqual(kernel_expected, kernel_value)
def benchmark_create_1000_partitions_with_100_parameter_servers(self): workers, _ = create_local_cluster(num_workers=1, num_ps=100) worker_sessions = [tf.Session(w.target) for w in workers] worker = worker_sessions[0] partition_sizes = (1, 512, 1024*32, 1024*128) partitioned = [] for partition_size in partition_sizes: # max_shard_bytes is 4, shape is 1000*partition_size float32s which should # partition into 1000 shards, each containing partition_size float32s. print("Building partitioned variable with %d floats per partition" % partition_size) with tf.device(tf.train.replica_device_setter(ps_tasks=100)): partitioned_ix = tf.get_variable( "partitioned_%d" % partition_size, shape=[1000 * partition_size], dtype=tf.float32, # Each partition to have exactly N float32s partitioner=tf.variable_axis_size_partitioner( max_shard_bytes=4 * partition_size)) # Concatenates along axis 0 partitioned.append(tf.convert_to_tensor(partitioned_ix)) tf.global_variables_initializer().run(session=worker) for ix, partition_size in enumerate(partition_sizes): print("Running benchmark having partitions with %d floats" % partition_size) self.run_op_benchmark( worker, partitioned[ix], name=("read_concat_1000_partitions_from_" "100_parameter_servers_partsize_%d_floats" % partition_size))
def train(data_dir, checkpoint_path, config): """Trains the model with the given data Args: data_dir: path to the data for the model (see data_utils for data format) checkpoint_path: the path to save the trained model checkpoints config: one of the above configs that specify the model and how it should be run and trained Returns: None """ # Prepare Name data. print("Reading Name data in %s" % data_dir) names, counts = data_utils.read_names(data_dir) with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = NamignizerModel(is_training=True, config=config) tf.global_variables_initializer().run() for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, names, counts, config.epoch_size, m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) m.saver.save(session, checkpoint_path, global_step=i)
def testOneThread(self): with self.test_session() as sess: batch_size = 10 image_size = 32 num_batches = 5 zero64 = tf.constant(0, dtype=tf.int64) examples = tf.Variable(zero64) counter = examples.count_up_to(num_batches * batch_size) image = tf.random_normal([image_size, image_size, 3], dtype=tf.float32, name="images") label = tf.random_uniform([1], 0, 10, dtype=tf.int32, name="labels") batches = tf.train.batch([counter, image, label], batch_size=batch_size, num_threads=1) batches = slim.prefetch_queue.prefetch_queue(batches).dequeue() tf.global_variables_initializer().run() threads = tf.train.start_queue_runners() for i in range(num_batches): results = sess.run(batches) self.assertAllEqual(results[0], np.arange(i * batch_size, (i + 1) * batch_size)) self.assertEquals(results[1].shape, (batch_size, image_size, image_size, 3)) self.assertEquals(results[2].shape, (batch_size, 1)) # Reached the limit. with self.assertRaises(tf.errors.OutOfRangeError): sess.run(batches) for thread in threads: thread.join()
def testMultipleDequeue(self): with self.test_session() as sess: batch_size = 10 image_size = 32 num_batches = 4 zero64 = tf.constant(0, dtype=tf.int64) examples = tf.Variable(zero64) counter = examples.count_up_to(num_batches * batch_size) image = tf.random_normal([image_size, image_size, 3], dtype=tf.float32, name="images") label = tf.random_uniform([1], 0, 10, dtype=tf.int32, name="labels") batches = tf.train.batch([counter, image, label], batch_size=batch_size, num_threads=4) batcher = slim.prefetch_queue.prefetch_queue(batches) batches_list = [batcher.dequeue() for _ in range(2)] tf.global_variables_initializer().run() threads = tf.train.start_queue_runners() value_counter = [] for _ in range(int(num_batches / 2)): for batches in batches_list: results = sess.run(batches) value_counter.append(results[0]) self.assertEquals(results[1].shape, (batch_size, image_size, image_size, 3)) self.assertEquals(results[2].shape, (batch_size, 1)) self.assertAllEqual(np.sort(np.concatenate(value_counter)), np.arange(0, num_batches * batch_size)) # Reached the limit. with self.assertRaises(tf.errors.OutOfRangeError): sess.run(batches) for thread in threads: thread.join()
def train(model, data, gen, params): anim_frames = [] with tf.Session() as session: tf.local_variables_initializer().run() tf.global_variables_initializer().run() for step in range(params.num_steps + 1): # update discriminator x = data.sample(params.batch_size) z = gen.sample(params.batch_size) loss_d, _, = session.run([model.loss_d, model.opt_d], { model.x: np.reshape(x, (params.batch_size, 1)), model.z: np.reshape(z, (params.batch_size, 1)) }) # update generator z = gen.sample(params.batch_size) loss_g, _ = session.run([model.loss_g, model.opt_g], { model.z: np.reshape(z, (params.batch_size, 1)) }) if step % params.log_every == 0: print('{}: {:.4f}\t{:.4f}'.format(step, loss_d, loss_g)) if params.anim_path and (step % params.anim_every == 0): anim_frames.append( samples(model, session, data, gen.range, params.batch_size) ) if params.anim_path: save_animation(anim_frames, params.anim_path, gen.range) else: samps = samples(model, session, data, gen.range, params.batch_size) plot_distributions(samps, gen.range)
def testMultiLabelWithCenteredBias(self): n_classes = 3 head = head_lib._multi_label_head( n_classes=n_classes, enable_centered_bias=True, metric_class_ids=range(n_classes)) with tf.Graph().as_default(), tf.Session(): logits = tf.constant([[1., 0., 0.]]) labels = tf.constant([[0, 0, 1]]) model_fn_ops = head.head_ops({}, labels, tf.contrib.learn.ModeKeys.TRAIN, _noop_train_op, logits=logits) _assert_variables(self, expected_global=( "centered_bias_weight:0", "centered_bias_weight/Adagrad:0", ), expected_trainable=( "centered_bias_weight:0", )) tf.global_variables_initializer().run() _assert_summary_tags(self, ["loss", "centered_bias/bias_0", "centered_bias/bias_1", "centered_bias/bias_2"]) expected_loss = .89985204 _assert_metrics( self, expected_loss, self._expected_eval_metrics(expected_loss), model_fn_ops)
def testDifferingKeyHeadSizes(self, gate_style): """Checks if arbitrary key sizes are still supported.""" mem_slots = 2 head_size = 32 num_heads = 2 key_size = 128 batch_size = 5 input_shape = (batch_size, 3, 3) mem = relational_memory.RelationalMemory(mem_slots, head_size, num_heads, gate_style=gate_style, key_size=key_size) self.assertNotEqual(key_size, mem._head_size) inputs = tf.placeholder(tf.float32, input_shape) memory_0 = mem.initial_state(batch_size) _, memory_1 = mem(inputs, memory_0) with self.test_session() as session: tf.global_variables_initializer().run() results = session.run( {"memory_1": memory_1, "memory_0": memory_0}, feed_dict={inputs: np.ones(input_shape)}) self.assertTrue(np.any(np.not_equal(results["memory_0"], results["memory_1"])))
def pretrain(self): # load svhn dataset train_images, train_labels = self.load_svhn(self.svhn_dir, split='train') test_images, test_labels = self.load_svhn(self.svhn_dir, split='test') # build a graph model = self.model model.build_model() with tf.Session(config=self.config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(logdir=self.log_dir, graph=tf.get_default_graph()) for step in range(self.pretrain_iter+1): i = step % int(train_images.shape[0] / self.batch_size) batch_images = train_images[i*self.batch_size:(i+1)*self.batch_size] batch_labels = train_labels[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {model.images: batch_images, model.labels: batch_labels} sess.run(model.train_op, feed_dict) if (step+1) % 10 == 0: summary, l, acc = sess.run([model.summary_op, model.loss, model.accuracy], feed_dict) rand_idxs = np.random.permutation(test_images.shape[0])[:self.batch_size] test_acc, _ = sess.run(fetches=[model.accuracy, model.loss], feed_dict={model.images: test_images[rand_idxs], model.labels: test_labels[rand_idxs]}) summary_writer.add_summary(summary, step) print ('Step: [%d/%d] loss: [%.6f] train acc: [%.2f] test acc [%.2f]' \ %(step+1, self.pretrain_iter, l, acc, test_acc)) if (step+1) % 1000 == 0: saver.save(sess, os.path.join(self.model_save_path, 'svhn_model'), global_step=step+1) print ('svhn_model-%d saved..!' %(step+1))
def testVariableCopy(self): with graph1.as_default(): #Define a Variable in graph1 some_var = tf.Variable(2) #Initialize session sess1 = tf.Session() #Initialize the Variable tf.global_variables_initializer().run(session=sess1) #Make a copy of some_var in the defsult scope in graph2 copy1 = tf.contrib.copy_graph.copy_variable_to_graph( some_var, graph2) #Make another copy with different scope copy2 = tf.contrib.copy_graph.copy_variable_to_graph( some_var, graph2, "test_scope") #Initialize both the copies with graph2.as_default(): #Initialize Session sess2 = tf.Session() #Initialize the Variables tf.global_variables_initializer().run(session=sess2) #Ensure values in all three variables are the same v1 = some_var.eval(session=sess1) v2 = copy1.eval(session=sess2) v3 = copy2.eval(session=sess2) assert isinstance(copy1, tf.Variable) assert isinstance(copy2, tf.Variable) assert v1 == v2 == v3 == 2
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, random_seed=None): Model.__init__(self) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.graph = tf.Graph() with self.graph.as_default(): if random_seed is not None: tf.set_random_seed(random_seed) self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) # 初始化变量w, b w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.optimizer = utils.get_optimizer(opt_algo, learning_rate, self.loss) config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) tf.global_variables_initializer().run(session=self.sess)
def testGradientsAsVariables(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.test_session() as sess: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 global_step = tf.Variable(tf.zeros([], tf.int64), name='global_step') sgd_op = tf.train.GradientDescentOptimizer(3.0) grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1]) # Convert gradients to tf.Variables converted_grads = [ tf.Variable(tf.zeros([2], dtype)) for i in grads_and_vars ] convert_ops = [ tf.assign(converted_grads[i], gv[0]) for i, gv in enumerate(grads_and_vars) ] converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) opt_op = sgd_op.apply_gradients(converted_grads_and_vars, global_step) tf.global_variables_initializer().run() # Run convert_ops to achieve the gradietns converting sess.run(convert_ops) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-14., -13.], var0.eval()) self.assertAllClose([-6., -5.], var1.eval())
def testDenseFeaturesSeparableWithinMargins(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.5], [1.0, -0.5]]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints # are within the margins so there is unregularized loss (1/2 per example). # For these datapoints, optimal weights are w_1~=0.0 and w_2~=1.0 which # gives an L2 loss of ~0.25. self.assertAllClose([0.5, -0.5], predictions.eval(), rtol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.5, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.75, regularized_loss.eval(), atol=0.02)
def testDenseFeaturesWithArbitraryWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0, 0.0], [0.0, 1.0]]], weights=[20.0, 10.0], labels=[10.0, -5.0]) options = dict(symmetric_l2_regularization=5.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 + # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It # turns out that the optimal (variable) weights are given by: # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3. # In this case the (unnormalized regularized) loss will be: # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The # actual loss should be further normalized by the sum of example weights. self.assertAllClose([8.0, -10.0/3], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
def testDenseFeaturesPerfectlySeparable(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[1.0, 1.0], [1.0, -1.0]], weights=[1.0, 1.0], labels=[1.0, 0.0]) options = dict( symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) # (1.0, 1.0) and (1.0, -1.0) are perfectly separable by x-axis (that is, # the SVM's functional margin >=1), so the unregularized loss is ~0.0. # There is only loss due to l2-regularization. For these datapoints, it # turns out that w_1~=0.0 and w_2~=1.0 which means that l2 loss is ~0.25. unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.0, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.25, regularized_loss.eval(), atol=0.02)
def testL1Regularization(self): # Setup test data example_protos = [ make_example_proto( {'age': [0], 'gender': [0]}, -10.0), make_example_proto( {'age': [1], 'gender': [1]}, 14.0), ] example_weights = [1.0, 1.0] with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=4.0, loss_type='squared_loss') lr = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() prediction = lr.predictions(examples) loss = lr.regularized_loss(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # Predictions should be -4.0, 48/5 due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight self.assertAllClose([-4.0, 20.0 / 3.0], prediction.eval(), rtol=0.08) # Loss should be the sum of the regularized loss value from above per # example after plugging in the optimal weights. self.assertAllClose(308.0 / 6.0, loss.eval(), atol=0.01)
def testDenseFeaturesWithDefaultWeights(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [0.0]], [0.0, 1.0]], weights=[1.0, 1.0], labels=[10.0, -5.0]) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='squared_loss') lr = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() predictions = lr.predictions(examples) train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So, # differentiating wrt to w_1, w_2 yields the following optimal values: # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2. # In this case the (unnormalized regularized) loss will be: # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual # loss should be further normalized by the sum of example weights. self.assertAllClose([5.0, -2.5], predictions.eval(), rtol=0.01) loss = lr.regularized_loss(examples) self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01)
def testDenseFeaturesWeightedExamples(self): with self._single_threaded_test_session(): examples, variables = make_dense_examples_and_variables_dicts( dense_features_values=[[[1.0], [1.0]], [[0.5], [-0.5]]], weights=[3.0, 1.0], labels=[1.0, 0.0]) options = dict(symmetric_l2_regularization=1.0, symmetric_l1_regularization=0, loss_type='hinge_loss') model = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() predictions = model.predictions(examples) binary_predictions = get_binary_predictions_for_hinge(predictions) train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() model.update_weights(train_op).run() # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will # try to increase the margin from (1.0, 0.5). Due to regularization, # (1.0, -0.5) will be within the margin. For these points and example # weights, the optimal weights are w_1~=0.4 and w_2~=1.2 which give an L2 # loss of 0.5 * 0.25 * 0.25 * 1.6 = 0.2. The binary predictions will be # correct, but the boundary will be much closer to the 2nd point than the # first one. self.assertAllClose([1.0, -0.2], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) unregularized_loss = model.unregularized_loss(examples) regularized_loss = model.regularized_loss(examples) self.assertAllClose(0.2, unregularized_loss.eval(), atol=0.02) self.assertAllClose(0.4, regularized_loss.eval(), atol=0.02)
def testFractionalExampleLabel(self): # Setup test data with 1 positive, and 1 mostly-negative example. example_protos = [ make_example_proto( {'age': [0], 'gender': [0]}, 0.1), make_example_proto( {'age': [1], 'gender': [1]}, 1), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) variables = make_variable_dict(1, 1) options = dict(symmetric_l2_regularization=1, symmetric_l1_regularization=0, num_table_shards=num_shards, loss_type='logistic_loss') lr = SdcaModel(examples, variables, options) tf.global_variables_initializer().run() with self.assertRaisesOpError( 'Only labels of 0.0 or 1.0 are supported right now.'): lr.minimize().run()
def testBasicLSTMCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 8]) g, out_m = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=False)] * 2, state_is_tuple=False)(x, m) sess.run([tf.global_variables_initializer()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8])}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) # Test BasicLSTMCell with input_size != num_units. m = tf.zeros([1, 4]) g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, state_is_tuple=False)(x, m) sess.run([tf.global_variables_initializer()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]), m.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 2)
def run(args): #load configuration config = Config(args) #load data dataset = ld.LoadData(args.input) data = dataset.data label = dataset.label anomaly_num = dataset.anomaly_num feature_index = dataset.feature_index # feature_item_num = np.sum(dataset.feature_item_num) feature_item_num = dataset.feature_item_num # number of unique item ids in dataset instance instance_num = len(data) #for training training_data = data[:instance_num-2*anomaly_num] training_data = ld.get_shaped_data(training_data,config.batch_size,config.block_size,len(data[0])) print("----------finish shaping training data!-----------") instance_dim = len(training_data[0][0][0]) #for testing testing_data = data[instance_num-2*anomaly_num:] testing_label = label[instance_num-2*anomaly_num:] #shuffle testing data,to ensure testing data and label are shuffled in the same way randnum = config.seed random.seed(randnum) random.shuffle(testing_data) random.seed(randnum) random.shuffle(testing_label) testing_data = ld.get_shaped_data(testing_data,config.batch_size,config.block_size,len(data[0])) print("----------finish shaping testing data!-----------") testing_data_num = len(testing_label) - len(testing_label)%(config.block_size*config.batch_size) testing_label = testing_label[:testing_data_num] # testing data instance level ground truth print("training data",training_data.shape,instance_dim) print("testing data",testing_data.shape,testing_data_num,testing_data[0].shape) print("anomaly_num",anomaly_num) print("number of normal data in testing data:",np.sum(testing_label),len(testing_label)) print("feature_item_num",feature_item_num) with tf.Graph().as_default(),tf.Session() as sess: #graph settings FM_weight_dim = config.FM_weight_dim batch_size = config.batch_size block_size = config.block_size attention_dim = config.attention_dim autoencoder_hidden_dim = config.autoencoder_hidden_dim lstm_dropout_keep_prob = config.lstm_dropout_keep_prob lstm_layer_num = config.lstm_layer_num lstm_hidden_size = config.lstm_hidden_size is_training = config.is_training gan_hidden_dim = config.gan_hidden_dim alpha = config.alpha beta = config.beta noise = config.noise learning_rate = config.learning_rate model = AnomalyNet(feature_index, FM_weight_dim, feature_item_num, batch_size, block_size, instance_dim, attention_dim, autoencoder_hidden_dim, lstm_dropout_keep_prob, lstm_layer_num, lstm_hidden_size, is_training, gan_hidden_dim, alpha, beta, noise, learning_rate) saver = tf.train.Saver(max_to_keep=10)#saver for checkpoints, add var_list because of batching training init = tf.global_variables_initializer() sess.run(init) flag = 0 for epoch in range(config.epoch): # training for i in range(len(training_data)): flag = flag + 1 pointer = flag % 100 curr_batch = training_data[i] feed_dict = {model.data: curr_batch} if pointer < 50: result = sess.run((model.G_train),feed_dict=feed_dict) else: result = sess.run((model.D_train),feed_dict=feed_dict) # result = sess.run((model.G_train,model.D_train),feed_dict=feed_dict) if i % 50 == 0: result = sess.run((model.generator_loss,model.discriminator_loss),feed_dict=feed_dict) print("current epoch %d, in batch %d, current flag is %d, generator average loss %.4f, discriminator average loss %.4f"%(epoch,i,pointer,result[0],result[1])) # result = sess.run((model.test1,model.test2,model.test3,model.test4),feed_dict=feed_dict) # print(result[0],result[0].shape,result[1],result[1].shape)#,result[2][0:10],result[2].shape,result[3],result[3].shape) # model_path = "saved_model/epoch_%s.ckpt" % (epoch) # saver.save(sess, model_path) # ''' # ##### # testing # ##### # ''' #instance output instance_loss_list = [] block_loss_list = [] for i in range(len(testing_data)): curr_batch = testing_data[i] feed_dict = {model.data: curr_batch} instance_loss,block_loss = sess.run((model.instance_total_loss,model.block_total_loss),feed_dict=feed_dict) for i in range(len(instance_loss)): instance_loss_list.append(instance_loss[i]) for i in range(len(block_loss)): block_loss_list.append(block_loss[i]) bw = open(args.instance_output+'_%d'%(epoch), 'w')#by dingfu bw.write("true pred\n") for i in range(len(instance_loss_list)): bw.write(str(testing_label[i])+ " "+str(instance_loss_list[i])+"\n") bw.close() #block output testing_block_num = testing_data_num // config.block_size block_true = [] for i in range(testing_block_num): true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size]) # generate ground truth if true_sum < config.block_size*config.block_ratio: block_true.append(0) else: block_true.append(1) bw = open(args.block_output+'_%d'%(epoch), 'w')#by dingfu bw.write("true pred\n") for i in range(testing_block_num): bw.write(str(block_true[i])+ " "+str(block_loss_list[i])+"\n") bw.close() # print(true_block,pred_block) instance_auc,_,_,_ = newmetrics.roc(testing_label,instance_loss_list,pos_label=0,output_path=args.instance_output+'_%d'%(epoch))#by dingfu block_auc,_,_,_ = newmetrics.roc(block_true,block_loss_list,pos_label=0,output_path=args.block_output+'_%d'%(epoch))#by dingfu #print("instance level evaluation: ",instance_eval) print('epoch:',epoch," instance level auc: ",instance_auc) #print("block level evaluation: ",block_eval) print('epoch:',epoch," block level auc: ",block_auc)
def train_province(): global iterations_P global time_begin #if __name__ == '__main__' and sys.argv[1] == 'train': # 第一次遍历图片目录是为了获取图片总数 input_count = 0 for i in range(0, NUM_CLASSES_P): dir = './train_images/training-set/chinese-characters/%s/' % i # 这里可以改成你自己的图片目录,i为分类标签 for rt, dirs, files in os.walk(dir): for filename in files: input_count += 1 # 定义对应维数和各维长度的数组 input_images = np.array([[0] * SIZE for i in range(input_count)]) input_labels = np.array([[0] * NUM_CLASSES_P for i in range(input_count)]) # 第二次遍历图片目录是为了生成图片数据和标签 index = 0 for i in range(0, NUM_CLASSES_P): dir = './train_images/training-set/chinese-characters/%s/' % i # 这里可以改成你自己的图片目录,i为分类标签 for rt, dirs, files in os.walk(dir): for filename in files: filename = dir + filename img = Image.open(filename) width = img.size[0] height = img.size[1] for h in range(0, height): for w in range(0, width): # 通过这样的处理,使数字的线条变细,有利于提高识别准确率 if img.getpixel((w, h)) > 230: input_images[index][w + h * width] = 0 else: input_images[index][w + h * width] = 1 input_labels[index][i] = 1 index += 1 # 第一次遍历图片目录是为了获取图片总数 val_count = 0 for i in range(0, NUM_CLASSES_P): dir = './train_images/validation-set/chinese-characters/%s/' % i # 这里可以改成你自己的图片目录,i为分类标签 for rt, dirs, files in os.walk(dir): for filename in files: val_count += 1 # 定义对应维数和各维长度的数组 val_images = np.array([[0] * SIZE for i in range(val_count)]) val_labels = np.array([[0] * NUM_CLASSES_P for i in range(val_count)]) # 第二次遍历图片目录是为了生成图片数据和标签 index = 0 for i in range(0, NUM_CLASSES_P): dir = './train_images/validation-set/chinese-characters/%s/' % i # 这里可以改成你自己的图片目录,i为分类标签 for rt, dirs, files in os.walk(dir): for filename in files: filename = dir + filename img = Image.open(filename) width = img.size[0] height = img.size[1] for h in range(0, height): for w in range(0, width): # 通过这样的处理,使数字的线条变细,有利于提高识别准确率 if img.getpixel((w, h)) > 230: val_images[index][w + h * width] = 0 else: val_images[index][w + h * width] = 1 val_labels[index][i] = 1 index += 1 with tf.Session() as sess: # 第一个卷积层 W_conv1 = tf.Variable(tf.truncated_normal([8, 8, 1, 16], stddev=0.1), name="W_conv1") b_conv1 = tf.Variable(tf.constant(0.1, shape=[16]), name="b_conv1") conv_strides = [1, 1, 1, 1] kernel_size = [1, 2, 2, 1] pool_strides = [1, 2, 2, 1] L1_pool = conv_layer(x_image, W_conv1, b_conv1, conv_strides, kernel_size, pool_strides, padding='SAME') # 第二个卷积层 W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 16, 32], stddev=0.1), name="W_conv2") b_conv2 = tf.Variable(tf.constant(0.1, shape=[32]), name="b_conv2") conv_strides = [1, 1, 1, 1] kernel_size = [1, 1, 1, 1] pool_strides = [1, 1, 1, 1] L2_pool = conv_layer(L1_pool, W_conv2, b_conv2, conv_strides, kernel_size, pool_strides, padding='SAME') # 全连接层 W_fc1 = tf.Variable(tf.truncated_normal([16 * 20 * 32, 512], stddev=0.1), name="W_fc1") b_fc1 = tf.Variable(tf.constant(0.1, shape=[512]), name="b_fc1") h_pool2_flat = tf.reshape(L2_pool, [-1, 16 * 20 * 32]) h_fc1 = full_connect(h_pool2_flat, W_fc1, b_fc1) # dropout keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # readout层 W_fc2 = tf.Variable(tf.truncated_normal([512, NUM_CLASSES_P], stddev=0.1), name="W_fc2") b_fc2 = tf.Variable(tf.constant(0.1, shape=[NUM_CLASSES_P]), name="b_fc2") # 定义优化器和训练op y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_P, logits=y_conv)) train_step = tf.train.AdamOptimizer((1e-4)).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_P, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 初始化saver saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) time_elapsed = time.time() - time_begin print("读取图片文件耗费时间:%d秒" % time_elapsed) time_begin = time.time() print("一共读取了 %s 个训练图像, %s 个标签" % (input_count, input_count)) # 设置每次训练op的输入个数和迭代次数,这里为了支持任意图片总数,定义了一个余数remainder,譬如,如果每次训练op的输入个数为60,图片总数为150张,则前面两次各输入60张,最后一次输入30张(余数30) batch_size = 60 iterations_P = iterations_P batches_count = int(input_count / batch_size) remainder = input_count % batch_size print("训练数据集分成 %s 批, 前面每批 %s 个数据,最后一批 %s 个数据" % (batches_count + 1, batch_size, remainder)) # 执行训练迭代 for it in range(iterations_P): # 这里的关键是要把输入数组转为np.array for n in range(batches_count): train_step.run( feed_dict={ x: input_images[n * batch_size:(n + 1) * batch_size], y_P: input_labels[n * batch_size:(n + 1) * batch_size], keep_prob: 0.5 }) if remainder > 0: start_index = batches_count * batch_size train_step.run( feed_dict={ x: input_images[start_index:input_count - 1], y_P: input_labels[start_index:input_count - 1], keep_prob: 0.5 }) # 每完成五次迭代,判断准确度是否已达到100%,达到则退出迭代循环 iterate_accuracy = 0 if it % 5 == 0: iterate_accuracy = accuracy.eval(feed_dict={ x: val_images, y_P: val_labels, keep_prob: 1.0 }) print('第 %d 次训练迭代: 准确率 %0.5f%%' % (it, iterate_accuracy * 100)) if iterate_accuracy >= 0.995 and it >= 150: break print('完成训练!') time_elapsed = time.time() - time_begin print("训练耗费时间:%d秒" % time_elapsed) time_begin = time.time() # 保存训练结果 if not os.path.exists(SAVER_DIR_P): print('不存在训练数据保存目录,现在创建保存目录') os.makedirs(SAVER_DIR_P) saver_path = saver.save(sess, "%smodel.ckpt" % (SAVER_DIR_P))
def _fit_cdr(self): import tensorflow as tf from .model import Model n_users = self.train_set.num_users n_items = self.train_set.num_items text_feature = self.train_set.item_text.batch_bow( np.arange(n_items) ) # bag of word feature text_feature = (text_feature - text_feature.min()) / ( text_feature.max() - text_feature.min() ) # normalization # Build model layer_sizes = ( [self.vocab_size] + self.autoencoder_structure + [self.k] + self.autoencoder_structure + [self.vocab_size] ) tf.set_random_seed(self.seed) model = Model( n_users=n_users, n_items=n_items, n_vocab=self.vocab_size, k=self.k, layers=layer_sizes, lambda_u=self.lambda_u, lambda_v=self.lambda_v, lambda_w=self.lambda_w, lambda_n=self.lambda_n, lr=self.learning_rate, dropout_rate=self.dropout_rate, U=self.U, V=self.V, act_fn=self.act_fn, seed=self.seed, ) # Training model config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) loop = trange(self.max_iter, disable=not self.verbose) for _ in loop: corruption_mask = self.rng.binomial( 1, 1 - self.corruption_rate, (n_items, self.vocab_size) ) sum_loss = 0 count = 0 batch_count = 0 for batch_u, batch_i, batch_j in self.train_set.uij_iter( batch_size=self.batch_size, shuffle=True ): feed_dict = { model.mask_input: corruption_mask[batch_i, :], model.text_input: text_feature[batch_i, :], model.batch_u: batch_u, model.batch_i: batch_i, model.batch_j: batch_j, } sess.run(model.opt1, feed_dict) # train U, V _, _loss = sess.run( [model.opt2, model.loss], feed_dict ) # train SDAE sum_loss += _loss count += len(batch_u) batch_count += 1 if batch_count % 10 == 0: loop.set_postfix(loss=(sum_loss / count)) self.U, self.V = sess.run([model.U, model.V]) tf.reset_default_graph() if self.verbose: print("\nLearning completed")
def train(self, trainX=None, trainy=None, valX=None, valy=None): """train dnn based malware detector""" if trainX is None and trainy is None: trainX, valX, _ = utils.read_joblib( config.get('feature.' + self.feature_tp, 'dataX')) trainy, valy, _ = utils.read_joblib( config.get('feature.' + self.feature_tp, 'datay')) train_input_supervised = utils.DataProducer( trainX, trainy, self.hp_params.batch_size, n_epochs=self.hp_params.n_epochs) val_input = utils.DataProducer(valX, valy, self.hp_params.batch_size, name='test') global_train_step = tf.train.get_or_create_global_step() saver = tf.train.Saver() tf.summary.scalar('accuracy', self.accuracy) tf.summary.scalar('loss', self.cross_entropy) merged_summaries = tf.summary.merge_all() # optimizer with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): optimizer = tf.train.AdamOptimizer( self.hp_params.learning_rate).minimize( self.cross_entropy, global_step=global_train_step) tf_cfg = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True) tf_cfg.gpu_options.allow_growth = True tf_cfg.gpu_options.per_process_gpu_memory_fraction = 1. sess = tf.Session(config=tf_cfg) with sess.as_default(): summary_writer = tf.summary.FileWriter(self.save_dir, sess.graph) sess.run(tf.global_variables_initializer()) training_time = 0.0 train_input_supervised.reset_cursor() output_steps = 50 best_val_acc = 0. for step_idx, X_batch, y_batch in train_input_supervised.next_batch( ): train_dict = { self.x_input: X_batch, self.y_input: y_batch, self.is_training: True } if (step_idx + 1) % output_steps == 0: print('Step {}/{}:{}'.format(step_idx + 1, train_input_supervised.steps, datetime.now())) val_input.reset_cursor() val_accs = [sess.run(self.accuracy, feed_dict={self.x_input: valX_batch, self.y_input: valy_batch, self.is_training: False}) \ for [_, valX_batch, valy_batch] in val_input.next_batch() ] _acc = np.mean(val_accs) print(' validation accuracy {:.5}%'.format(_acc * 100)) if step_idx != 0: print(' {} samples per second'.format( output_steps * self.hp_params.batch_size / training_time)) training_time = 0. summary = sess.run(merged_summaries, feed_dict=train_dict) summary_writer.add_summary(summary, global_train_step.eval(sess)) if best_val_acc < _acc: if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) saver.save(sess, os.path.join(self.save_dir, 'checkpoint'), global_step=global_train_step) start = default_timer() sess.run(optimizer, feed_dict=train_dict) end = default_timer() training_time = training_time + end - start sess.close()
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) ## fc2 layer ## W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) # the error between prediction and real data cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1])) # loss train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) sess = tf.Session() # important step # tf.initialize_all_variables() no long valid from # 2017-03-02 if using tensorflow >= 0.12 if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1: init = tf.initialize_all_variables() else: init = tf.global_variables_initializer() sess.run(init) for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5}) if i % 50 == 0: print(compute_accuracy( mnist.test.images, mnist.test.labels))
import tensorflow as tf with tf.device('/cpu:0'): a = tf.constant([1.0,2.0,3.0],shape=[3],name='a') b = tf.constant([1.0,2.0,3.0],shape=[3],name='b') with tf.device('/gpu:1'): c = a+b #注意:allow_soft_placement=True表明:计算设备可自行选择,如果没有这个参数,会报错。 #因为不是所有的操作都可以被放在GPU上,如果强行将无法放在GPU上的操作指定到GPU上,将会报错。 sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)) #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(tf.global_variables_initializer()) print(sess.run(c))
def train(): with tf.Graph().as_default(): with tf.device('/gpu:'+str(GPU_INDEX)): pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Note the global_step=batch parameter to minimize. # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains. batch = tf.Variable(0) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred = get_model(pointclouds_pl, is_training_pl, bn_decay=bn_decay) loss = get_loss(pred, labels_pl) tf.summary.scalar('loss', loss) correct = tf.equal(tf.argmax(pred, 2), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE*NUM_POINT) tf.summary.scalar('accuracy', accuracy) # Get training operator learning_rate = get_learning_rate(batch) tf.summary.scalar('learning_rate', learning_rate) if OPTIMIZER == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = True sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test')) # Init variables init = tf.global_variables_initializer() sess.run(init, {is_training_pl:True}) ops = {'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': loss, 'train_op': train_op, 'merged': merged, 'step': batch} for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def __init__(self, files, img_length, num_colors, d_sizes, g_sizes): # save for later self.N = len(files) self.img_length = img_length self.num_colors = num_colors self.latent_dims = g_sizes['z'] # define the input data self.Z = tf.placeholder( tf.float32, shape=(None, self.latent_dims), name='Z' ) # note: by making batch_sz a placeholder, we can specify a variable # number of samples in the FS-conv operation where we are required # to pass in output_shape # we need only pass in the batch size via feed_dict self.batch_sz = tf.placeholder(tf.int32, shape=(), name='batch_sz') filename_queue = tf.train.string_input_producer(files) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) images = preprocessing(value, self.num_colors, self.img_length) self.X = tf.train.shuffle_batch([images], batch_size=self.batch_sz, capacity=2000, allow_smaller_final_batch=True, min_after_dequeue=50) # build the discriminator logits = self.build_discriminator(self.X, d_sizes) # build generator self.sample_images = self.build_generator(self.Z, g_sizes) # get sample logits with tf.variable_scope("discriminator") as scope: scope.reuse_variables() sample_logits = self.d_forward(self.sample_images, True) # get sample images for test time (batch norm is different) with tf.variable_scope("generator") as scope: scope.reuse_variables() self.sample_images_test = self.g_forward( self.Z, reuse=True, is_training=False ) # build costs self.d_cost_real = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf.ones_like(logits) ) self.d_cost_fake = tf.nn.sigmoid_cross_entropy_with_logits( logits=sample_logits, labels=tf.zeros_like(sample_logits) ) self.d_cost = tf.reduce_mean(self.d_cost_real) + tf.reduce_mean(self.d_cost_fake) self.g_cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=sample_logits, labels=tf.ones_like(sample_logits) ) ) real_predictions = tf.cast(logits > 0, tf.float32) fake_predictions = tf.cast(sample_logits < 0, tf.float32) num_predictions = 2.0*BATCH_SIZE num_correct = tf.reduce_sum(real_predictions) + tf.reduce_sum(fake_predictions) self.d_accuracy = num_correct / num_predictions # optimizers self.d_params = [t for t in tf.trainable_variables() if t.name.startswith('d')] self.g_params = [t for t in tf.trainable_variables() if t.name.startswith('g')] self.d_train_op = tf.train.AdamOptimizer( LEARNING_RATE, beta1=BETA1 ).minimize( self.d_cost, var_list=self.d_params ) self.g_train_op = tf.train.AdamOptimizer( LEARNING_RATE, beta1=BETA1 ).minimize( self.g_cost, var_list=self.g_params ) # show_all_variables() # exit() # set up session and variables for later self.init_op = tf.global_variables_initializer() self.sess = tf.InteractiveSession() self.sess.run(self.init_op)
def train(): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id """ Step 1: Create dirs for saving models and logs """ model_path_suffix = os.path.join( FLAGS.network_def, 'input_{}_output_{}'.format(FLAGS.input_size, FLAGS.heatmap_size), 'joints_{}'.format(FLAGS.num_of_joints), 'stages_{}'.format(FLAGS.cpm_stages), 'init_{}_rate_{}_step_{}'.format(FLAGS.init_lr, FLAGS.lr_decay_rate, FLAGS.lr_decay_step)) model_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn', 'models', 'weights', model_path_suffix) train_log_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn', 'models', 'logs', model_path_suffix, 'train') test_log_save_dir = os.path.join('logs_and_weights_add_occlusion_and_fpn', 'models', 'logs', model_path_suffix, 'test') os.system('mkdir -p {}'.format(model_save_dir)) os.system('mkdir -p {}'.format(train_log_save_dir)) os.system('mkdir -p {}'.format(test_log_save_dir)) """ Step 2: Create dataset and data generator """ print('--Parsing Config File') params = process_config(datagenerator_config_file) print('--Creating Dataset') dataset = DataGenerator( params['total_joints_list'], params['blouse_joints_list'], params['dress_joints_list'], params['outwear_joints_list'], params['skirt_joints_list'], params['trousers_joints_list'], params['blouse_index'], params['dress_index'], params['outwear_index'], params['skirt_index'], params['trousers_index'], params['img_directory'], params['training_data_file']) dataset.generate_set(rand=True, validationRate=0.15) generator = dataset.generator(batchSize=FLAGS.batch_size, norm=False, sample='train') generator_eval = dataset.generator(batchSize=FLAGS.batch_size, norm=False, sample='valid') """ Step 3: Build network graph """ model = cpm_model.CPM_Model(total_num=FLAGS.total_num, input_size=FLAGS.input_size, heatmap_size=FLAGS.heatmap_size, batch_size=FLAGS.batch_size, stages=FLAGS.cpm_stages, num_joints=FLAGS.num_of_joints, img_type=FLAGS.color_channel, is_training=True) # model.build_loss(FLAGS.init_lr, FLAGS.lr_decay_rate, FLAGS.lr_decay_step, optimizer='Adam') model.build_loss3(optimizer='Adam') print('=====Model Build=====\n') merged_summary = tf.summary.merge_all() """ Step 4: Training """ device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # Create tensorboard train_writer = tf.summary.FileWriter(train_log_save_dir, sess.graph) test_writer = tf.summary.FileWriter(test_log_save_dir, sess.graph) # Create model saver saver = tf.train.Saver(max_to_keep=None) # Init all vars init_op = tf.global_variables_initializer() sess.run(init_op) # Restore pretrained weights if FLAGS.pretrained_model != '': if FLAGS.pretrained_model.endswith('.pkl'): model.load_weights_from_file(FLAGS.pretrained_model, sess, finetune=True) # Check weights for variable in tf.trainable_variables(): with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) else: checkpoint = tf.train.get_checkpoint_state( FLAGS.pretrained_model) # 获取最新保存的模型检查点文件 ckpt = checkpoint.model_checkpoint_path saver.restore(sess, ckpt) # check weights for variable in tf.trainable_variables(): with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) for training_itr in range(FLAGS.training_iters): t1 = time.time() """ 修改重点:DataGenerator应用的地方 """ # Read one batch data batch_x_np, batch_gt_heatmap_np, batch_centermap, batch_weight_np = next( generator) # print(batch_x_np.shape,batch_gt_heatmap_np.shape, batch_centermap.shape) if FLAGS.normalize_img: # Normalize images batch_x_np = batch_x_np / 255.0 - 0.5 else: batch_x_np -= 128.0 ''' # Generate heatmaps from joints batch_gt_heatmap_np = cpm_utils.make_heatmaps_from_joints(FLAGS.input_size, FLAGS.heatmap_size, FLAGS.joint_gaussian_variance, batch_joints_np) ''' # Forward and update weights stage_losses_np, total_loss_np, _, summaries, current_lr, \ stage_heatmap_np, global_step = sess.run([model.stage_loss, model.total_loss, model.train_op, merged_summary, model.lr, model.stage_heatmap, model.global_step ], feed_dict={model.input_images: batch_x_np, model.cmap_placeholder: batch_centermap, model.gt_hmap_placeholder: batch_gt_heatmap_np, model.train_weights_placeholder: batch_weight_np}) # Show training info print_current_training_stats(global_step, current_lr, stage_losses_np, total_loss_np, time.time() - t1) # Write logs train_writer.add_summary(summaries, global_step) if FLAGS.if_show: # Draw intermediate results if (global_step + 1) % FLAGS.img_show_iters == 0: if FLAGS.color_channel == 'GRAY': demo_img = np.repeat(batch_x_np[0], 3, axis=2) if FLAGS.normalize_img: demo_img += 0.5 else: demo_img += 128.0 demo_img /= 255.0 elif FLAGS.color_channel == 'RGB': if FLAGS.normalize_img: demo_img = batch_x_np[0] + 0.5 else: demo_img += 128.0 demo_img /= 255.0 else: raise ValueError('Non support image type.') demo_stage_heatmaps = [] for stage in range(FLAGS.cpm_stages): demo_stage_heatmap = stage_heatmap_np[stage][ 0, :, :, 0:FLAGS.num_of_joints].reshape( (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) demo_stage_heatmap = cv2.resize( demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size)) demo_stage_heatmap = np.amax(demo_stage_heatmap, axis=2) demo_stage_heatmap = np.reshape( demo_stage_heatmap, (FLAGS.input_size, FLAGS.input_size, 1)) demo_stage_heatmap = np.repeat(demo_stage_heatmap, 3, axis=2) demo_stage_heatmaps.append(demo_stage_heatmap) demo_gt_heatmap = batch_gt_heatmap_np[ 0, :, :, 0:FLAGS.num_of_joints].reshape( (FLAGS.heatmap_size, FLAGS.heatmap_size, FLAGS.num_of_joints)) demo_gt_heatmap = cv2.resize( demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size)) demo_gt_heatmap = np.amax(demo_gt_heatmap, axis=2) demo_gt_heatmap = np.reshape( demo_gt_heatmap, (FLAGS.input_size, FLAGS.input_size, 1)) demo_gt_heatmap = np.repeat(demo_gt_heatmap, 3, axis=2) if FLAGS.cpm_stages >= 4: upper_img = np.concatenate( (demo_stage_heatmaps[0], demo_stage_heatmaps[1], demo_stage_heatmaps[2]), axis=1) if FLAGS.normalize_img: blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap else: blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap lower_img = np.concatenate( (demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, blend_img), axis=1) demo_img = np.concatenate((upper_img, lower_img), axis=0) cv2.imshow('current heatmap', (demo_img * 255).astype(np.uint8)) cv2.waitKey(1000) else: if FLAGS.normalize_img: blend_img = 0.5 * demo_img + 0.5 * demo_gt_heatmap else: blend_img = 0.5 * demo_img / 255.0 + 0.5 * demo_gt_heatmap upper_img = np.concatenate( (demo_stage_heatmaps[FLAGS.cpm_stages - 1], demo_gt_heatmap, blend_img), axis=1) cv2.imshow('current heatmap', (upper_img * 255).astype(np.uint8)) cv2.waitKey(1000) if (global_step + 1) % FLAGS.validation_iters == 0: mean_val_loss = 0 cnt = 0 while cnt < 10: batch_x_np, batch_gt_heatmap_np, batch_centermap, batch_weight_np = next( generator_eval) # Normalize images batch_x_np = batch_x_np / 255.0 - 0.5 #batch_gt_heatmap_np = cpm_utils.make_heatmaps_from_joints(FLAGS.input_size, # FLAGS.heatmap_size, # FLAGS.joint_gaussian_variance, # batch_joints_np) total_loss_np, summaries = sess.run( [model.total_loss, merged_summary], feed_dict={ model.input_images: batch_x_np, model.cmap_placeholder: batch_centermap, model.gt_hmap_placeholder: batch_gt_heatmap_np, model.train_weights_placeholder: batch_weight_np }) mean_val_loss += total_loss_np cnt += 1 print('\nValidation loss: {:>7.2f}\n'.format(mean_val_loss / cnt)) test_writer.add_summary(summaries, global_step) # Save models if (global_step + 1) % FLAGS.model_save_iters == 0: saver.save(sess=sess, save_path=model_save_dir + '/' + FLAGS.network_def.split('.py')[0], global_step=(global_step + 1)) print('\nModel checkpoint saved...\n') # Finish training if global_step == FLAGS.training_iters: saver.save(sess=sess, save_path=model_save_dir + '/' + FLAGS.network_def.split('.py')[0], global_step=(global_step + 1)) print('\nModel checkpoint saved...\n') break print('Training done.')
def run_inference_graph(model, trained_checkpoint_prefix, dataset, num_images, ignore_label, input_shape, pad_to_shape, label_color_map, output_directory, num_classes, eval_dir, min_dir, dist_dir, hist_dir, dump_dir): assert len(input_shape) == 3, "input shape must be rank 3" batch = 1 do_ood = FLAGS.do_ood epsilon = FLAGS.epsilon dump_dir += "_" + str(epsilon) mean_value = 508.7571 std_value = 77.60572284853058 if FLAGS.max_softmax: thresh = 0.07100591715976332 #dim dist from sun train #thresh = 0.0650887573964497 #dim from sun train else: thresh = 0.37583892617449666 #dim from sun train effective_shape = [batch] + input_shape dataset = dataset.batch(batch, drop_remainder=True) dataset = dataset.apply(tf.data.experimental.ignore_errors()) data_iter = dataset.make_one_shot_iterator() input_dict = data_iter.get_next() input_tensor = input_dict[dataset_builder._IMAGE_FIELD] annot_tensor = input_dict[dataset_builder._LABEL_FIELD] input_name = input_dict[dataset_builder._IMAGE_NAME_FIELD] annot_pl = tf.placeholder(tf.float32, annot_tensor.get_shape().as_list()) outputs, placeholder_tensor = deploy_segmentation_inference_graph( model=model, input_shape=effective_shape, #input=input_tensor, pad_to_shape=pad_to_shape, input_type=tf.float32) pred_tensor = outputs[model.main_class_predictions_key] final_logits = outputs[model.final_logits_key] unscaled_logits = outputs[model.unscaled_logits_key] #mean = np.reshape(mean, [-1] + mean_dims) #var_inv = np.reshape(var_inv, [-1] + var_dims) with tf.device("gpu:1"): if not FLAGS.max_softmax: dist_class, img_dist, full_dist, min_dist, mean_p, var_inv_p, vars_noload, dbg = process_logits(final_logits, mean, var_inv, depth, pred_tensor.get_shape().as_list(), num_classes, global_cov, global_mean) dist_colour = _map_to_colored_labels(dist_class, label_color_map) pred_colour = _map_to_colored_labels(pred_tensor, label_color_map) selected = min_dist if do_ood: if FLAGS.max_softmax: interp_logits = tf.image.resize_bilinear(unscaled_logits, pred_tensor.shape.as_list()[1:3]) dist_pred = 1.0 - tf.reduce_max(tf.nn.softmax(interp_logits/FLAGS.t_value),-1, keepdims=True) dist_class = tf.to_float(dist_pred >= thresh) selected = dist_pred vars_noload = [] else: #dist_pred = tf.reduce_min(tf.nn.softmax(full_dist), -1, keepdims=True) dist_pred = tf.expand_dims(pred_to_ood(min_dist, mean_value, std_value, thresh),-1) dist_class = tf.to_float(dist_pred >= thresh) #pred is the baseline of assuming all ood pred_tensor = tf.ones_like(pred_tensor) with tf.device("gpu:1"): neg_validity_mask = get_valid(annot_pl, ignore_label) with tf.variable_scope("PredIou"): (pred_miou, pred_conf_mat, pred_update), _ = get_miou(annot_pl, pred_tensor, num_classes, ignore_label, do_ood, neg_validity_mask) with tf.variable_scope("DistIou"): (dist_miou, dist_conf_mat, dist_update), _ = get_miou(annot_pl, dist_class, num_classes, ignore_label, do_ood, neg_validity_mask) weights = tf.to_float(neg_validity_mask) num_thresholds = 200 ood_label = tf.to_float(annot_pl >= num_classes) with tf.variable_scope("Roc"): RocPoints, roc_update = tf.contrib.metrics.streaming_curve_points(ood_label,dist_pred,weights,num_thresholds,curve='ROC') with tf.variable_scope("Pr"): PrPoints, pr_update = tf.contrib.metrics.streaming_curve_points(ood_label,dist_pred,weights,num_thresholds,curve='PR') update_op = [pred_update, dist_update, pr_update, roc_update] update_op = tf.group(update_op) if not FLAGS.max_softmax: mean = np.reshape(mean, mean_p.get_shape().as_list()) var_inv = np.reshape(var_inv, var_inv_p.get_shape().as_list()) input_fetch = [input_name, input_tensor, annot_tensor] fetch = {"update": update_op, "selected": selected, "ood_label": ood_label, } dbg = [] if FLAGS.train_kernel: fetch["predictions"] = pred_tensor fetch["min_dist_out"] = min_dist[0] if FLAGS.write_img: fetch["prediction_colour"] = pred_colour fetch["dist_out"] = tf.cast(dist_colour[0], tf.uint8) fetch["full_dist_out"] = full_dist[0] fetch["min_dist_out"] = min_dist[0] if FLAGS.write_out: fetch["img_dist_out"] = img_dist[0] fetch["unscaled_logits_out"] = unscaled_logits[0] grads = tf.gradients(selected, placeholder_tensor) if epsilon > 0.0: adv_img = placeholder_tensor - epsilon*tf.sign(grads) else: adv_img = tf.expand_dims(placeholder_tensor, 0) num_step = num_images // batch print("running for", num_step, "steps") #os.makedirs(dump_dir, exist_ok=True) if FLAGS.write_out: write_queue = Queue(30) num_writers = 20 writers = [ParallelWriter(write_queue) for i in range(num_writers)] config = tf.ConfigProto(allow_soft_placement=True) #config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init_feed = {} if not FLAGS.max_softmax: init_feed = {mean_p: mean, var_inv_p: var_inv} sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()],init_feed) vars_toload = [v for v in tf.global_variables() if v not in vars_noload] saver = tf.train.Saver(vars_toload) saver.restore(sess, trained_checkpoint_prefix) if FLAGS.train_kernel: kimg_pl, kedges_pl, kloss, ktrain_step, kfilter = kernel_model((1, 1024, 2048, 1)) init = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="kmodel") sess.run(tf.variables_initializer(init)) #sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) for idx in range(num_step): start_time = timeit.default_timer() inputs = sess.run(input_fetch) annot_raw = inputs[2] img_raw = inputs[1] image_path = inputs[0][0].decode("utf-8") filename = os.path.basename(image_path) dump_filename = os.path.join(dump_dir, filename + ".npy") adv_img_out = sess.run(adv_img, feed_dict={placeholder_tensor: img_raw, annot_pl: annot_raw}) adv_img_out = adv_img_out[0] res, dbg_v = sess.run([fetch, dbg], feed_dict={ placeholder_tensor: adv_img_out, annot_pl: annot_raw}) roc = sess.run(RocPoints) auc = -np.trapz(roc[:,1], roc[:,0]) pred_miou_v, dist_miou_v = sess.run([pred_miou, dist_miou]) # if auc > 0.1: # import pdb; pdb.set_trace() # if idx % 25 == 0 and idx != 0: # roc = sess.run(RocPoints) # plt.plot(roc[:,0], roc[:,1]) # plt.show() if FLAGS.train_kernel: predictions = res["predictions"] min_dist_out = res["min_dist_out"] edges = cv2.Canny(predictions[0].astype(np.uint8),1,1) #import pdb; pdb.set_trace() filter = train_kernel(min_dist_out, edges, sess, kimg_pl, kedges_pl, kloss, ktrain_step, kfilter) #all_filters.append(filter) # kernel = gkern(sigma=0.2) dilated = np.expand_dims(cv2.filter2D(edges,-1,filter[...,0,0]),-1).astype(np.float32) dilated = dilated/np.max(dilated) disp = cv2.resize(np.concatenate([to_img(min_dist_out), to_img(dilated)], 1), (int(1920), int(1080))) cv2.imshow("test", disp) cv2.waitKey(1) if FLAGS.write_img: prediction_colour = res["prediction_colour"] dist_out = res["dist_out"] full_dist_out = res["full_dist_out"] predictions = res["predictions"] min_dist_out = res["min_dist_out"] # annot_out = res[8][0] # n_values = np.max(annot_out) + 1 # one_hot_out = np.eye(n_values)[annot_out][...,0,:num_classes] min_dist_v = min_dist_out# np.expand_dims(np.nanmin(full_dist_out, -1), -1) min_dist_v[np.logical_not(np.isfinite(min_dist_v))] = np.nanmin(min_dist_out) min_dist_v = min_dist_v - np.min(min_dist_v) #min now at 0 min_dist_v = (255*min_dist_v/np.max(min_dist_v)).astype(np.uint8) #max now at 255 save_location = os.path.join(output_directory, filename) dist_filename = os.path.join(dist_dir, filename) min_filename = os.path.join(min_dir, filename) #write_hist(min_dist_out, "Min Dist", os.path.join(hist_dir, filename)) #all_mins.append(min_dist_out) # if idx == 30: # write_hist(all_mins, "Combined Dists", os.path.join(hist_dir, "all")) prediction_colour = prediction_colour.astype(np.uint8) output_channels = len(label_color_map[0]) if output_channels == 1: prediction_colour = np.squeeze(prediction_colour[0],-1) else: prediction_colour = prediction_colour[0] #import pdb; pdb.set_trace() write_queue.put((idx, save_location, prediction_colour)) write_queue.put((idx, min_filename, min_dist_v)) write_queue.put((idx, dist_filename, dist_out)) if FLAGS.write_out: img_dist_out = res["img_dist_out"] unscaled_logits_out = res["unscaled_logits_out"] #if not os.path.exists(dump_filename): write_queue.put((idx, dump_filename, {"dist": img_dist_out, "unscaled_logits": unscaled_logits_out})) #else: # print("skipping", filename, " ") if FLAGS.debug: dist_out = res[2][0].astype(np.uint8) full_dist_out = res[4][0] min_dist_out = res[5][0] min_dist_v = np.expand_dims(np.nanmin(full_dist_out, -1), -1) min_dist_v[np.logical_not(np.isfinite(min_dist_v))] = np.nanmin(full_dist_out) min_dist_v = min_dist_v - np.min(min_dist_v) #min now at 0 min_dist_v = (255*min_dist_v/np.max(min_dist_v)).astype(np.uint8) #max now at 255 final_out = res[7][0] annot_out = inputs[2][0] img_out = inputs[1][0] thresh = np.median(min_dist_out) grain = (np.max(min_dist_out) - np.min(min_dist_out))/300 print(thresh, " ", grain) while True: mask = np.expand_dims(min_dist_out < thresh,-1) #cv2.imshow("img", (255*mask).astype(np.uint8)) cv2.imshow("img", (img_out*mask).astype(np.uint8)) key = cv2.waitKey(1) if key == 27: #escape break elif key == 115: #s thresh += grain print(thresh, " ", grain) elif key == 119: #w thresh -= grain print(thresh, " ", grain) elif key == 97: #a grain -= 5 print(thresh, " ", grain) elif key == 100: #d grain += 5 print(thresh, " ", grain) elif key == 112: #p import pdb; pdb.set_trace() elapsed = timeit.default_timer() - start_time end = "\r" if idx % 50 == 0: #every now and then do regular print end = "\n" if FLAGS.write_out: qsize = write_queue.qsize() else: qsize = 0 print('{0:.4f} iter: {1}, pred iou: {2:.6f}, dist iou: {3:.6f}, auc:{4:0.6f}'.format(elapsed, idx+1, pred_miou_v, dist_miou_v, auc)) if not FLAGS.write_out: roc = sess.run(RocPoints) pr = sess.run(PrPoints) make_plots(roc,pr,num_thresholds) if FLAGS.write_out: for w in writers: w.close() print('{0:.4f} iter: {1}, pred iou: {2:.6f}, dist iou: {3:.6f}'.format(elapsed, idx+1, pred_miou_v, dist_miou_v))
def __init__(self, params, hidden_weights=None): self.params = params self.network_shape = self.params['network_shape'] self.input_dim = self.network_shape[0] self.output_dim = self.network_shape[-1] self.batch_size = self.params['batch_size'] self.hidden_weights = hidden_weights # self.weights = self.initialize_weights() # self.mirror_weights = self.initialize_mirror_weights() # self.readout_weights = self.initialize_readout_weights() # self.hs = self.initialize_hs() # self.hidden_states = self.initialize_hidden_states() self.tensorboard_dir = self.params['tensorboard_dir'] self.activation_function = self.params['activation_function'] self.optimizer_ = self.params['optimizer'] # model self.input = tf.placeholder(tf.float32, [None, self.input_dim], name="input") self.output = tf.placeholder(tf.float32, [None, self.output_dim], name="output") self.activation_patterns = {} self.hidden_state_activation_patterns = {} self.activation = self.input self.hidden_states = {} self.hidden_states_update_ops = {} for i in range(1, len(self.network_shape) - 1): with tf.name_scope("layer{0}".format(i)): h = tf.Variable(tf.truncated_normal([self.network_shape[i]]), name="hidden_state", trainable=False) # h = tf.truncated_normal([self.batch_size, self.network_shape[i]]) self.hidden_states["hs_{0}".format(i)] = h Utils.variable_summaries( self.hidden_states["hs_{0}".format(i)], "hs_{0}".format(i)) if self.hidden_weights is not None: H_tune = tf.Variable(1.0, trainable=True, name="H_tune") Utils.variable_summaries(H_tune, "H_tune") else: H_tune = tf.Variable(1, trainable=False, name="H_tune") for i in range(len(self.network_shape) - 1): with tf.name_scope("layer{0}".format(i + 1)): if i < len(self.network_shape) - 2: with tf.name_scope("hidden"): # input weight and bias W = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="W") bW = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bW") Utils.variable_summaries(W, "W") Utils.variable_summaries(bW, "bW") H_name = "H_{0}".format(i + 1) if self.hidden_weights is not None and H_name in self.hidden_weights.keys( ): H = tf.Variable( self.hidden_weights[H_name].astype('float32'), dtype=tf.float32, trainable=False, name="H") else: H = tf.Variable(tf.random_normal([ self.network_shape[i + 1], self.network_shape[i + 1] ], stddev=0.05), trainable=False, name="H") input_for_hidden = tf.matmul(self.activation, W) + bW tiled_h = tf.reshape( tf.tile(self.hidden_states["hs_{0}".format(i + 1)], [self.batch_size]), [self.batch_size, -1]) hidden_update = tf.nn.tanh( tf.add( input_for_hidden, tf.matmul(tiled_h, tf.scalar_mul(H_tune, H)))) with tf.name_scope("mirror"): # mirror input and bias M = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="M") bM = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bM") Utils.variable_summaries(M, "M") Utils.variable_summaries(bM, "bM") input_for_mirror = tf.nn.tanh( tf.matmul(self.activation, M) + bM) with tf.name_scope("readout"): # readout weights and biases R = tf.Variable(tf.random_normal([ self.network_shape[i + 1], self.network_shape[i + 1] ], stddev=0.05), name="R") bR = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bR") Utils.variable_summaries(R, "R") Utils.variable_summaries(bR, "bR") readout = self.activation_function( tf.matmul(hidden_update, R) + bR) with tf.name_scope("activation"): self.activation = self.activation_function( tf.multiply(readout, input_for_mirror)) # self.hidden_state_activation_patterns['hidden_state_layer_{0}'.format(i + 1)] = self.hidden_states[self.hidden_states["hs_{0}".format(i+1)]] self.hidden_states_update_ops["hs_{0}".format( i + 1)] = self.hidden_states["hs_{0}".format( i + 1)].assign(hidden_update[0]) # self.hidden_states["hs_{0}".format(i+1)] = hidden_update else: W = tf.Variable(tf.random_normal( [self.network_shape[i], self.network_shape[i + 1]], stddev=0.05), name="W") bW = tf.Variable(tf.random_normal( [self.network_shape[i + 1]], stddev=0.05), name="bW") Utils.variable_summaries(W, "W") Utils.variable_summaries(bW, "bW") with tf.name_scope("activation"): self.activation = self.activation_function( tf.matmul(self.activation, W) + bW) act = self.activation if i > 0: self.activation_patterns['layer_{0}'.format(i)] = act # cost with tf.name_scope("cost"): self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( self.activation, self.output)) tf.summary.scalar('cost', self.cost) with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.argmax(self.activation, 1), tf.argmax(self.output, 1)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', self.accuracy) self.optimizer = self.optimizer_.minimize(self.cost) self.sess = tf.Session() self.merged = tf.summary.merge_all() self.summ_writer = tf.summary.FileWriter(self.tensorboard_dir, self.sess.graph) init = tf.global_variables_initializer() self.sess.run(init)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # train_op = optimizer.minimize(loss[0] + l2_loss, var_list=update_vars, global_step=global_step) # apply gradient clip to avoid gradient exploding gvs = optimizer.compute_gradients(loss[0] + l2_loss, var_list=update_vars) clip_grad_var = [gv if gv[0] is None else [ tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs] train_op = optimizer.apply_gradients(clip_grad_var, global_step=global_step) if args.save_optimizer: print('Saving optimizer parameters to checkpoint! Remember to restore the global_step in the fine-tuning afterwards.') saver_to_save = tf.train.Saver() saver_best = tf.train.Saver() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) saver_to_restore.restore(sess, args.restore_path) merged = tf.summary.merge_all() writer = tf.summary.FileWriter(args.log_dir, sess.graph) print('\n----------- start to train -----------\n') best_mAP = -np.Inf for epoch in range(args.total_epoches): sess.run(train_init_op) loss_total, loss_xy, loss_wh, loss_conf, loss_class = AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() for i in trange(args.train_batch_num): _, summary, __y_pred, __y_true, __loss, __global_step, __lr = sess.run(
def train(args, sess, model): #Adam optimizers are used instead of AdaDelta d_optimizer = tf.train.AdamOptimizer(args.learning_rate, beta1=args.momentum, name="AdamOptimizer_D").minimize( model.d_loss, var_list=model.d_vars) c_optimizer = tf.train.AdamOptimizer(args.learning_rate, beta1=args.momentum, name="AdamOptimizer_C").minimize( model.recon_loss, var_list=model.c_vars) global_optimizer = tf.train.AdamOptimizer(args.learning_rate, beta1=args.momentum, name="AdamOptimizer_C").minimize( model.loss_all, var_list=model.c_vars) epoch = 0 step = 0 global_step = 0 #saver saver = tf.train.Saver() if args.continue_training: tf.local_variables_initializer().run() last_ckpt = tf.train.latest_checkpoint(args.checkpoints_path) saver.restore(sess, last_ckpt) ckpt_name = str(last_ckpt) print("Loaded model file from " + ckpt_name) epoch = int(ckpt_name.split('-')[-1]) else: tf.global_variables_initializer().run() tf.local_variables_initializer().run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #summary init all_summary = tf.summary.merge([ model.recon_loss_sum, model.d_loss_sum, model.loss_all_sum, model.input_img_sum, model.real_img_sum, model.recon_img_sum, model.g_local_imgs_sum, model.r_local_imgs_sum ]) writer = tf.summary.FileWriter(args.graph_path, sess.graph) #training starts here #first train completion network while epoch < args.train_step: #Training Stage 1 (Completion Network) if epoch < args.Tc: summary, c_loss, _ = sess.run( [all_summary, model.recon_loss, c_optimizer]) writer.add_summary(summary, global_step) print("Epoch [%d] Step [%d] C Loss: [%.4f]" % (epoch, step, c_loss)) elif epoch < args.Tc + args.Td: #Training Stage 2 (Discriminator Network) summary, d_loss, _ = sess.run( [all_summary, model.d_loss, d_optimizer]) writer.add_summary(summary, global_step) print("Epoch [%d] Step [%d] D Loss: [%.4f]" % (epoch, step, d_loss)) else: #Training Stage 3 (Completion Network) summary, g_loss, _ = sess.run( [all_summary, model.loss_all, global_optimizer]) writer.add_summary(summary, global_step) print("Epoch [%d] Step [%d] C Loss: [%.4f]" % (epoch, step, g_loss)) # Check Test image results every time epoch is finished if step * args.batch_size >= model.data_count: saver.save(sess, args.checkpoints_path + "/model", global_step=epoch) #res_img = sess.run(model.test_res_imgs) ## save test img result #img_tile(epoch, args, res_img) step = 0 epoch += 1 step += 1 global_step += 1 coord.request_stop() coord.join(threads) sess.close() print("Done.")
def main(): """ Main program """ local_device_protos = device_lib.list_local_devices() logging.info( [x.name for x in local_device_protos if x.device_type == 'GPU']) bq = _bq.BQHandler() io = _io.IO(gs_bucket=options.gs_bucket) viz = _viz.Viz() starttime, endtime = io.get_dates(options) #save_path = options.save_path+'/'+options.config_name logging.info('Using dataset {} and time range {} - {}'.format( options.feature_dataset, starttime.strftime('%Y-%m-%d'), endtime.strftime('%Y-%m-%d'))) all_param_names = options.label_params + options.feature_params + options.meta_params aggs = io.get_aggs_from_param_names(options.feature_params) logging.info('Reading data...') bq.set_params(starttime, endtime, batch_size=2500000, loc_col='trainstation', project=options.project, dataset=options.feature_dataset, table=options.feature_table, parameters=all_param_names, only_winters=options.only_winters) data = bq.get_rows() data = io.filter_train_type(labels_df=data, train_types=options.train_types, sum_types=True, train_type_column='train_type', location_column='trainstation', time_column='time', sum_columns=['train_count', 'delay'], aggs=aggs) if options.y_avg_hours is not None: data = io.calc_running_delay_avg(data, options.y_avg_hours) if options.y_avg: data = io.calc_delay_avg(data) data.sort_values(by=['time', 'trainstation'], inplace=True) if options.normalize: logging.info('Normalizing data...') xscaler = StandardScaler() yscaler = StandardScaler() non_scaled_data = data.loc[:, options.meta_params] labels = data.loc[:, options.label_params].astype( np.float32).values.reshape((-1, 1)) yscaler.fit(labels) scaled_labels = pd.DataFrame(yscaler.transform(labels), columns=['delay']) scaled_features = pd.DataFrame(xscaler.fit_transform( data.loc[:, options.feature_params].astype(np.float32)), columns=options.feature_params) data = pd.concat([non_scaled_data, scaled_features, scaled_labels], axis=1) if options.pca: logging.info('Doing PCA analyzis for the data...') ipca = IncrementalPCA(n_components=options.pca_components, whiten=options.whiten, copy=False) non_processed_data = data.loc[:, options.meta_params + options.label_params] processed_data = data.loc[:, options.feature_params] ipca.fit(processed_data) processed_features = pd.DataFrame(ipca.transform(processed_data)) data = pd.concat([non_processed_data, processed_data], axis=1) fname = options.output_path + '/ipca_explained_variance.png' viz.explained_variance(ipca, fname) io._upload_to_bucket(filename=fname, ext_filename=fname) data_train, data_test = train_test_split(data, test_size=0.33) X_test, y_test = io.extract_batch(data_test, options.time_steps, batch_size=None, pad_strategy=options.pad_strategy, quantile=options.quantile, label_params=options.label_params, feature_params=options.feature_params) # Define model batch_size = io.get_batch_size(data_train, options.pad_strategy, quantile=options.quantile) logging.info('Batch size: {}'.format(batch_size)) model = LSTM.LSTM(options.time_steps, len(options.feature_params), 1, options.n_hidden, options.lr, options.p_drop, batch_size=batch_size) # Initialization rmses, mses, maes, steps, train_mse = [], [], [], [], [] saver = tf.train.Saver() sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) summary_writer = tf.summary.FileWriter(options.log_dir, graph=tf.get_default_graph()) #tf.summary.scalar('Training MSE', model.loss) tf.summary.scalar('Validation_MSE', model.mse) tf.summary.scalar('Validation_RMSE', model.rmse) tf.summary.scalar('Validation_MAE', model.mae) tf.summary.histogram('y_pred_hist', model.y_pred) merged_summary_op = tf.summary.merge_all() train_summary_op = tf.summary.scalar('Training_MSE', model.loss) train_step = 0 start = 0 while True: # If slow is set, go forward one time step at time, # else proceed whole batch size if options.slow: X_train, y_train = io.extract_batch( data_train, options.time_steps, start=start, pad_strategy=options.pad_strategy, quantile=options.quantile, label_params=options.label_params, feature_params=options.feature_params) else: X_train, y_train = io.extract_batch( data_train, options.time_steps, train_step, pad_strategy=options.pad_strategy, quantile=options.quantile, label_params=options.label_params, feature_params=options.feature_params) if (len(X_train) < options.time_steps): break if options.cv: logging.info('Doing random search for hyper parameters...') param_grid = { "C": [0.001, 0.01, 0.1, 1, 10], "epsilon": [0.01, 0.1, 0.5], "kernel": ['rbf', 'linear', 'poly', 'sigmoid', 'precomputed'], "degree": [2, 3, 4], "shrinking": [True, False], "gamma": [0.001, 0.01, 0.1], "coef0": [0, 0.1, 1] } random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=int( options.n_iter_search), n_jobs=-1) random_search.fit(X_train, y_train) logging.info("RandomizedSearchCV done.") fname = options.output_path + '/random_search_cv_results.txt' report_cv_results(random_search.cv_results_, fname) io._upload_to_bucket(filename=fname, ext_filename=fname) sys.exit() else: if train_step == 0: logging.info('Training...') feed_dict = {model.X: X_train, model.y: y_train} _, loss, train_summary = sess.run( [model.train_op, model.loss, train_summary_op], feed_dict=feed_dict) summary_writer.add_summary(train_summary, train_step * batch_size) # Metrics feed_dict = {model.X: X_test, model.y: y_test} #model.cell_init_state: state} val_loss, rmse, mse, mae, y_pred, summary = sess.run( [ model.loss, model.rmse, model.mse, model.mae, model.y_pred, merged_summary_op ], feed_dict=feed_dict) train_mse.append(loss) mses.append(mse) rmses.append(rmse) maes.append(mae) steps.append(train_step) summary_writer.add_summary(summary, train_step * batch_size) if train_step % 50 == 0: logging.info("Step {}:".format(train_step)) logging.info("Training loss: {:.4f}".format(loss)) logging.info("Validation MSE: {:.4f}".format(val_loss)) logging.info('Validation RMSE: {}'.format(rmse)) logging.info('Validation MAE: {}'.format(mae)) logging.info('................') saver.save(sess, options.save_file) train_step += 1 start += 1 # <-- while True: saver.save(sess, options.save_file) if options.normalize: fname = options.save_path + '/yscaler.pkl' io.save_scikit_model(yscaler, fname, fname) io._upload_dir_to_bucket(options.save_path, options.save_path) try: fname = options.output_path + '/learning_over_time.png' metrics = [{ 'metrics': [{ 'values': mses, 'label': 'Validation MSE' }, { 'values': train_mse, 'label': 'Train MSE' }], 'y_label': 'MSE' }, { 'metrics': [{ 'values': rmses, 'label': 'Validation RMSE' }], 'y_label': 'RMSE' }, { 'metrics': [{ 'values': maes, 'label': 'Validation MAE' }], 'y_label': 'MAE' }] viz.plot_learning(metrics, fname) io._upload_to_bucket(filename=fname, ext_filename=fname) except Exception as e: logging.error(e) error_data = { 'steps': steps, 'mse': mses, 'rmse': rmses, 'mae': maes, 'train_mse': train_mse } fname = '{}/training_time_validation_errors.csv'.format( options.output_path) io.write_csv(error_data, filename=fname, ext_filename=fname)
def train(train_data, test_data=None): G = train_data[0] features = train_data[1] id_map = train_data[2] class_map = train_data[4] if isinstance(list(class_map.values())[0], list): num_classes = len(list(class_map.values())[0]) else: num_classes = len(set(class_map.values())) if not features is None: # pad with dummy zero vector features = np.vstack([features, np.zeros((features.shape[1],))]) context_pairs = train_data[3] if FLAGS.random_context else None placeholders = construct_placeholders(num_classes) minibatch = NodeMinibatchIterator(G, id_map, placeholders, class_map, num_classes, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, context_pairs = context_pairs) adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape) adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info") if FLAGS.model == 'graphsage_mean': # Create model sampler = UniformNeighborSampler(adj_info) if FLAGS.samples_3 != 0: layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2), SAGEInfo("node", sampler, FLAGS.samples_3, FLAGS.dim_2)] elif FLAGS.samples_2 != 0: layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)] else: layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1)] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos, model_size=FLAGS.model_size, sigmoid_loss = FLAGS.sigmoid, identity_dim = FLAGS.identity_dim, logging=True) elif FLAGS.model == 'gcn': # Create model sampler = UniformNeighborSampler(adj_info) layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, 2*FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, 2*FLAGS.dim_2)] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="gcn", model_size=FLAGS.model_size, concat=False, sigmoid_loss = FLAGS.sigmoid, identity_dim = FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_seq': sampler = UniformNeighborSampler(adj_info) layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="seq", model_size=FLAGS.model_size, sigmoid_loss = FLAGS.sigmoid, identity_dim = FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_maxpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="maxpool", model_size=FLAGS.model_size, sigmoid_loss = FLAGS.sigmoid, identity_dim = FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_meanpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)] model = SupervisedGraphsage(num_classes, placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="meanpool", model_size=FLAGS.model_size, sigmoid_loss = FLAGS.sigmoid, identity_dim = FLAGS.identity_dim, logging=True) else: raise Exception('Error: model name unrecognized.') config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION config.allow_soft_placement = True # Initialize session sess = tf.Session(config=config) merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_dir(), sess.graph) # Init variables sess.run(tf.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj}) # Train model total_steps = 0 avg_time = 0.0 epoch_val_costs = [] train_adj_info = tf.assign(adj_info, minibatch.adj) val_adj_info = tf.assign(adj_info, minibatch.test_adj) for epoch in range(FLAGS.epochs): minibatch.shuffle() iter = 0 print('Epoch: %04d' % (epoch + 1)) epoch_val_costs.append(0) while not minibatch.end(): # Construct feed dictionary feed_dict, labels = minibatch.next_minibatch_feed_dict() feed_dict.update({placeholders['dropout']: FLAGS.dropout}) t = time.time() # Training step outs = sess.run([merged, model.opt_op, model.loss,model.outputs1,model.preds], feed_dict=feed_dict) train_cost = outs[2] if iter % FLAGS.validate_iter == 0: # Validation sess.run(val_adj_info.op) if FLAGS.validate_batch_size == -1: val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size) else: val_cost, val_f1_mic, val_f1_mac, duration = evaluate(sess, model, minibatch, FLAGS.validate_batch_size) sess.run(train_adj_info.op) epoch_val_costs[-1] += val_cost if total_steps % FLAGS.print_every == 0: summary_writer.add_summary(outs[0], total_steps) # Print results avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1) if total_steps % FLAGS.print_every == 0: train_f1_mic, train_f1_mac = calc_f1(labels, outs[-1]) print("Iter:", '%04d' % iter, "train_loss=", "{:.5f}".format(train_cost), "train_f1_mic=", "{:.5f}".format(train_f1_mic), "train_f1_mac=", "{:.5f}".format(train_f1_mac), "val_loss=", "{:.5f}".format(val_cost), "val_f1_mic=", "{:.5f}".format(val_f1_mic), "val_f1_mac=", "{:.5f}".format(val_f1_mac), "time=", "{:.5f}".format(avg_time)) iter += 1 total_steps += 1 if total_steps > FLAGS.max_total_steps: break if total_steps > FLAGS.max_total_steps: break print("Optimization Finished!") if FLAGS.save_embeddings: sess.run(val_adj_info.op) save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir()) val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size) print("Full validation stats:", "loss=", "{:.5f}".format(val_cost), "f1_micro=", "{:.5f}".format(val_f1_mic), "f1_macro=", "{:.5f}".format(val_f1_mac), "time=", "{:.5f}".format(duration)) with open(log_dir() + "val_stats.txt", "w") as fp: fp.write("loss={:.5f} f1_micro={:.5f} f1_macro={:.5f} time={:.5f}". format(val_cost, val_f1_mic, val_f1_mac, duration)) print("Writing test set stats to file (don't peak!)") val_cost, val_f1_mic, val_f1_mac, duration = incremental_evaluate(sess, model, minibatch, FLAGS.batch_size, test=True) with open(log_dir() + "test_stats.txt", "w") as fp: fp.write("loss={:.5f} f1_micro={:.5f} f1_macro={:.5f}". format(val_cost, val_f1_mic, val_f1_mac))
def train(self,para_dict): #----var parsing epochs = para_dict['epochs'] GPU_ratio = para_dict['GPU_ratio'] batch_size = para_dict['batch_size'] ratio=para_dict['ratio'] #----local var train_loss_list = list() train_acc_list = list() test_loss_list = list() test_acc_list = list() epoch_time_list = list() img_quantity = 0 self.content = self.log_update(self.content, para_dict) #----ratio if ratio <= 1.0: img_quantity = int(self.train_paths.shape[0] * ratio) self.train_paths = self.train_paths[:img_quantity] self.train_labels = self.train_labels[:img_quantity] print("img_quantity:",img_quantity) else: img_quantity = self.train_paths.shape[0] #----calculate iterations of one epoch train_ites = math.ceil(self.train_paths.shape[0] / batch_size) # if self.test_img_dir is not None: # test_ites = math.ceil(self.test_paths.shape[0] / batch_size) #----GPU setting config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True) if GPU_ratio is None: config.gpu_options.allow_growth = True else: config.gpu_options.per_process_gpu_memory_fraction = GPU_ratio with tf.Session(config=config) as sess: #----tranfer learning check files = [file.path for file in os.scandir(self.save_dir) if file.name.split(".")[-1] == 'meta'] if len(files) == 0: sess.run(tf.global_variables_initializer()) print("no previous model param can be used!") else: check_name = files[-1].split("\\")[-1].split(".")[0] model_path = os.path.join(self.save_dir,check_name) self.saver.restore(sess,model_path) msg = "use previous model param:{}".format(model_path) print(msg) print("img_quantity:", img_quantity) #----epoch training for epoch in range(epochs): #----record the start time d_t = time.time() train_loss = 0 train_acc = 0 test_loss = 0 test_acc = 0 #----shuffle indice = np.random.permutation(self.train_paths.shape[0]) self.train_paths = self.train_paths[indice] self.train_labels = self.train_labels[indice] #----do optimizers(training by iteration) for index in range(train_ites): #----get image start and end numbers num_start = index * batch_size num_end = np.minimum(num_start + batch_size, self.train_paths.shape[0]) #d_t_2 = time.time() batch_data = self.get_4D_data(self.train_paths[num_start:num_end],self.model_shape[1:]) #rint("Batch data process time:", d_t_2) #----put all data to tf placeholders feed_dict = {self.tf_input:batch_data, self.tf_label_batch:self.train_labels[num_start:num_end], self.tf_keep_prob:0.8, self.tf_phase_train:True} #----session run sess.run(self.optimizer,feed_dict=feed_dict) #----evaluation(training set) for index in range(train_ites): #----get image start and end numbers num_start = index * batch_size num_end = np.minimum(num_start + batch_size, self.train_paths.shape[0]) batch_data = self.get_4D_data(self.train_paths[num_start:num_end],self.model_shape[1:]) #----put all data to tf placeholders feed_dict = {self.tf_input:batch_data, self.tf_label_batch:self.train_labels[num_start:num_end], self.tf_keep_prob:1.0, self.tf_phase_train:False} #----session run loss_temp, predict_temp = sess.run([self.loss,self.prediction],feed_dict=feed_dict) #----calculate the loss and accuracy train_loss += loss_temp train_acc += self.evaluation(predict_temp,self.train_labels[num_start:num_end]) train_loss /= train_ites train_acc /= self.train_paths.shape[0] #-----testing set(LFW) evaluation if self.test_img_dir is not None: test_acc = self.eval_on_lfw(sess, feed_dict, self.test_img_dir, batch_size=batch_size) #print("train_loss:{}, train_acc:{}".format(train_loss,train_acc)) #----evaluation(test set) # if self.test_img_dir is not None: # for index in range(test_ites): # # ----get image start and end numbers # num_start = index * batch_size # num_end = np.minimum(num_start + batch_size, self.test_paths.shape[0]) # # batch_data = self.get_4D_data(self.test_paths[num_start:num_end], self.model_shape[1:]) # # # ----put all data to tf placeholders # feed_dict = {self.tf_input: batch_data, # self.tf_label_batch: self.test_labels[num_start:num_end], # self.tf_keep_prob: 1.0} # # # ----session run # loss_temp, predict_temp = sess.run([self.loss, self.prediction], feed_dict=feed_dict) # # # ----calculate the loss and accuracy # test_loss += loss_temp # test_acc += self.evaluation(predict_temp, self.test_labels[num_start:num_end]) # # test_loss /= test_ites # test_acc /= self.test_paths.shape[0] # #print("test_loss:{}, test_acc:{}".format(test_loss, test_acc)) #----save ckpt, pb files model_save_path = self.saver.save(sess,self.out_dir_prefix,global_step=epoch) print("save model CKPT to ",model_save_path) graph = tf.get_default_graph().as_graph_def() output_graph_def = tf.graph_util.convert_variables_to_constants(sess,graph,self.pb_save_list) with tf.gfile.GFile(self.pb_save_path,'wb')as f: f.write(output_graph_def.SerializeToString()) print("save PB file to ",self.pb_save_path) #----record the end time d_t = time.time() - d_t #----save results in the log file train_loss_list.append(float(train_loss)) train_acc_list.append(float(train_acc)) if self.test_img_dir is not None: #test_loss_list.append(float(test_loss)) test_acc_list.append(float(test_acc)) self.content["train_loss_list"] = train_loss_list self.content["train_acc_list"] = train_acc_list if self.test_img_dir is not None: #self.content["test_loss_list"] = test_loss_list self.content["test_acc_list"] = test_acc_list epoch_time_list.append(d_t) self.content['ave_epoch_time'] = float(np.average(epoch_time_list)) with open(self.log_path, 'w') as f: json.dump(self.content,f) print("save the log file in ",self.log_path) #----display training results print("Epoch: ",epoch) print("training loss:{}, accuracy:{}".format(train_loss,train_acc)) if self.test_img_dir is not None: print("test set accuracy:{}".format( test_acc)) print("Epoch time consumption:",d_t)
def main(gpu_id = None): # Select gpu if gpu_id is not None: os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id # Reset Tensorflow graph tf.reset_default_graph() # Placeholders for the tensorflow model x = tf.placeholder(tf.float32, shape=[par['batch_train_size'],*par['n_input']],name='input') y = tf.placeholder(tf.float32, shape=[par['batch_train_size'],par['n_output']],name='target') # Generate stimulus stim = Stimulus() # Model stats losses = [] testing_losses = [] save_iter = [] config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: device = '/cpu:0' if gpu_id is None else '/gpu:0' with tf.device(device): model = ConvModelTop(x,y) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() prev_loss = 1000000 start = time.time() for i in range(par['num_iterations']): # Generate training batch and train model input_data, target_data, _ = stim.generate_train_batch() feed_dict = {x: input_data, y: target_data} _, train_loss, model_output = sess.run([model.train_op, model.loss, model.output], feed_dict=feed_dict) # Check current status if i % par['print_iter'] == 0: # Print current status print_conv_stats(i, train_loss, time.time()-start) losses.append(train_loss) save_iter.append(i) # Test and save model if i % par['save_iter'] == 0: # Generate test bach and get model performance test_input, test_target, _ = stim.generate_test_batch() feed_dict = {x: test_input, y: test_target} test_loss, test_output = sess.run([model.loss, model.output], feed_dict=feed_dict) testing_losses.append(test_loss) # Plot model outputs if test_loss < prev_loss: prev_loss = test_loss plot_conv_outputs(target_data, model_output, test_target, test_output, i) # Save training stats and model weight = sess.run(tf.get_collection(tf.GraphKeys.VARIABLES, 'filters/kernel')[0]) pickle.dump({'iter':save_iter,'weight':weight,'losses': losses, 'test_loss': testing_losses, 'last_iter': i}, \ open(par['save_dir']+'run_'+str(par['run_number'])+'_model_stats.pkl', 'wb')) saved_path = saver.save(sess, './conv_model_top') print('model saved in {}'.format(saved_path)) # Stop training if test_loss < 50: break # Plot loss curve if i > 0: plt.plot(losses[1:]) plt.savefig(par['save_dir']+'run_'+str(par['run_number'])+'_training_curve.png') plt.close()
def train(sess, loss, x_train, y_train, init_all=False, evaluate=None, feed=None, args=None, rng=None, var_list=None, fprop_args=None, optimizer=None, devices=None, x_batch_preprocessor=None, use_ema=False, ema_decay=.998, run_canary=None, loss_threshold=1e5, dataset_train=None, dataset_size=None): """ Run (optionally multi-replica, synchronous) training to minimize `loss` :param sess: TF session to use when training the graph :param loss: tensor, the loss to minimize :param x_train: numpy array with training inputs or tf Dataset :param y_train: numpy array with training outputs or tf Dataset :param init_all: (boolean) If set to true, all TF variables in the session are (re)initialized, otherwise only previously uninitialized variables are initialized before training. :param evaluate: function that is run after each training iteration (typically to display the test/validation accuracy). :param feed: An optional dictionary that is appended to the feeding dictionary before the session runs. Can be used to feed the learning phase of a Keras model for instance. :param args: dict or argparse `Namespace` object. Should contain `nb_epochs`, `learning_rate`, `batch_size` :param rng: Instance of numpy.random.RandomState :param var_list: Optional list of parameters to train. :param fprop_args: dict, extra arguments to pass to fprop (loss and model). :param optimizer: Optimizer to be used for training :param devices: list of device names to use for training If None, defaults to: all GPUs, if GPUs are available all devices, if no GPUs are available :param x_batch_preprocessor: callable Takes a single tensor containing an x_train batch as input Returns a single tensor containing an x_train batch as output Called to preprocess the data before passing the data to the Loss :param use_ema: bool If true, uses an exponential moving average of the model parameters :param ema_decay: float or callable The decay parameter for EMA, if EMA is used If a callable rather than a float, this is a callable that takes the epoch and batch as arguments and returns the ema_decay for the current batch. :param loss_threshold: float Raise an exception if the loss exceeds this value. This is intended to rapidly detect numerical problems. Sometimes the loss may legitimately be higher than this value. In such cases, raise the value. If needed it can be np.inf. :param dataset_train: tf Dataset instance. Used as a replacement for x_train, y_train for faster performance. :param dataset_size: integer, the size of the dataset_train. :return: True if model trained """ # Check whether the hardware is working correctly canary.run_canary() if run_canary is not None: warnings.warn("The `run_canary` argument is deprecated. The canary " "is now much cheaper and thus runs all the time. The " "canary now uses its own loss function so it is not " "necessary to turn off the canary when training with " " a stochastic loss. Simply quit passing `run_canary`." "Passing `run_canary` may become an error on or after " "2019-10-16.") args = _ArgsWrapper(args or {}) fprop_args = fprop_args or {} # Check that necessary arguments were given (see doc above) # Be sure to support 0 epochs for debugging purposes if args.nb_epochs is None: raise ValueError("`args` must specify number of epochs") if optimizer is None: if args.learning_rate is None: raise ValueError("Learning rate was not given in args dict") assert args.batch_size, "Batch size was not given in args dict" if rng is None: rng = np.random.RandomState() if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) else: if not isinstance(optimizer, tf.train.Optimizer): raise ValueError("optimizer object must be from a child class of " "tf.train.Optimizer") grads = [] xs = [] preprocessed_xs = [] ys = [] if dataset_train is not None: assert x_train is None and y_train is None and x_batch_preprocessor is None if dataset_size is None: raise ValueError("You must provide a dataset size") data_iterator = dataset_train.make_one_shot_iterator().get_next() x_train, y_train = sess.run(data_iterator) devices = infer_devices(devices) for device in devices: with tf.device(device): x = tf.placeholder(x_train.dtype, (None,) + x_train.shape[1:]) y = tf.placeholder(y_train.dtype, (None,) + y_train.shape[1:]) xs.append(x) ys.append(y) if x_batch_preprocessor is not None: x = x_batch_preprocessor(x) # We need to keep track of these so that the canary can feed # preprocessed values. If the canary had to feed raw values, # stochastic preprocessing could make the canary fail. preprocessed_xs.append(x) loss_value = loss.fprop(x, y, **fprop_args) grads.append(optimizer.compute_gradients( loss_value, var_list=var_list)) num_devices = len(devices) print("num_devices: ", num_devices) grad = avg_grads(grads) # Trigger update operations within the default graph (such as batch_norm). with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_step = optimizer.apply_gradients(grad) epoch_tf = tf.placeholder(tf.int32, []) batch_tf = tf.placeholder(tf.int32, []) if use_ema: if callable(ema_decay): ema_decay = ema_decay(epoch_tf, batch_tf) ema = tf.train.ExponentialMovingAverage(decay=ema_decay) with tf.control_dependencies([train_step]): train_step = ema.apply(var_list) # Get pointers to the EMA's running average variables avg_params = [ema.average(param) for param in var_list] # Make temporary buffers used for swapping the live and running average # parameters tmp_params = [tf.Variable(param, trainable=False) for param in var_list] # Define the swapping operation param_to_tmp = [tf.assign(tmp, param) for tmp, param in safe_zip(tmp_params, var_list)] with tf.control_dependencies(param_to_tmp): avg_to_param = [tf.assign(param, avg) for param, avg in safe_zip(var_list, avg_params)] with tf.control_dependencies(avg_to_param): tmp_to_avg = [tf.assign(avg, tmp) for avg, tmp in safe_zip(avg_params, tmp_params)] swap = tmp_to_avg batch_size = args.batch_size assert batch_size % num_devices == 0 device_batch_size = batch_size // num_devices if init_all: sess.run(tf.global_variables_initializer()) else: initialize_uninitialized_global_variables(sess) for epoch in xrange(args.nb_epochs): if dataset_train is not None: nb_batches = int(math.ceil(float(dataset_size) / batch_size)) else: # Indices to shuffle training set index_shuf = list(range(len(x_train))) # Randomly repeat a few training examples each epoch to avoid # having a too-small batch while len(index_shuf) % batch_size != 0: index_shuf.append(rng.randint(len(x_train))) nb_batches = len(index_shuf) // batch_size rng.shuffle(index_shuf) # Shuffling here versus inside the loop doesn't seem to affect # timing very much, but shuffling here makes the code slightly # easier to read x_train_shuffled = x_train[index_shuf] y_train_shuffled = y_train[index_shuf] prev = time.time() for batch in range(nb_batches): if dataset_train is not None: x_train_shuffled, y_train_shuffled = sess.run(data_iterator) start, end = 0, batch_size else: # Compute batch start and end indices start = batch * batch_size end = (batch + 1) * batch_size # Perform one training step diff = end - start assert diff == batch_size feed_dict = {epoch_tf: epoch, batch_tf: batch} for dev_idx in xrange(num_devices): cur_start = start + dev_idx * device_batch_size cur_end = start + (dev_idx + 1) * device_batch_size feed_dict[xs[dev_idx]] = x_train_shuffled[cur_start:cur_end] feed_dict[ys[dev_idx]] = y_train_shuffled[cur_start:cur_end] if cur_end != end and dataset_train is None: msg = ("batch_size (%d) must be a multiple of num_devices " "(%d).\nCUDA_VISIBLE_DEVICES: %s" "\ndevices: %s") args = (batch_size, num_devices, os.environ['CUDA_VISIBLE_DEVICES'], str(devices)) raise ValueError(msg % args) if feed is not None: feed_dict.update(feed) _, loss_numpy = sess.run( [train_step, loss_value], feed_dict=feed_dict) if np.abs(loss_numpy) > loss_threshold: raise ValueError("Extreme loss during training: ", loss_numpy) if np.isnan(loss_numpy) or np.isinf(loss_numpy): raise ValueError("NaN/Inf loss during training") assert (dataset_train is not None or end == len(index_shuf)) # Check that all examples were used cur = time.time() _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) + " seconds") if evaluate is not None: if use_ema: # Before running evaluation, load the running average # parameters into the live slot, so we can see how well # the EMA parameters are performing sess.run(swap) evaluate() if use_ema: # Swap the parameters back, so that we continue training # on the live parameters sess.run(swap) if use_ema: # When training is done, swap the running average parameters into # the live slot, so that we use them when we deploy the model sess.run(swap) return True
def train(): """训练模型""" print('数据准备中...') bucket_dbs = data_utils.read_bucket_dbs(FLAGS.buckets_dir) bucket_sizes = [] for i in range(len(buckets)): bucket_size = bucket_dbs[i].size bucket_sizes.append(bucket_size) print('bucket {} 中有数据 {} 条'.format(i, bucket_size)) total_size = sum(bucket_sizes) print('共有数据 {} 条'.format(total_size)) with tf.Session() as sess: model = create_model(sess, False) sess.run(tf.global_variables_initializer()) # 计算每个文件数据占比 buckets_scale = [sum(bucket_sizes[:i + 1]) / total_size for i in range(len(bucket_sizes))] # 格式化控制台输出 metrics = ' '.join([ '\r[{}]', '{:.1f}%', '{}/{}', 'loss={:.3f}', '{}/{}' ]) bars_max = 20 with tf.device('/gpu:0'): for epoch_index in range(1, FLAGS.num_epoch + 1600): print('Epoch {}:'.format(epoch_index)) time_start = time.time() epoch_trained = 0 # 每个epoch已经训练的样本数 batch_loss = [] while True: # 随机选择一个要训练的bucket_id random_number = np.random.random_sample() bucket_id = min([i for i in range(len(buckets_scale)) if buckets_scale[i] > random_number]) # 拿出64个问答对 data, data_in 问答倒转 data, data_in = model.get_batch_data( bucket_dbs, bucket_id ) # 将问答对转换为模型训练可接受的格式 # bucket_10_20这个bucket对应的维度为:10*64 20*64 20*64 encoder_inputs, decoder_inputs, decoder_weights = model.get_batch( bucket_dbs, bucket_id, data ) # 训练 _, step_loss, output = model.step( sess, encoder_inputs, decoder_inputs, decoder_weights, bucket_id, False ) epoch_trained += FLAGS.batch_size batch_loss.append(step_loss) time_now = time.time() time_spend = time_now - time_start time_estimate = time_spend / (epoch_trained / FLAGS.num_per_epoch) percent = min(100, epoch_trained / FLAGS.num_per_epoch) * 100 bars = math.floor(percent / 100 * bars_max) sys.stdout.write(metrics.format( '=' * bars + '-' * (bars_max - bars), percent, epoch_trained, FLAGS.num_per_epoch, np.mean(batch_loss), data_utils.time(time_spend), data_utils.time(time_estimate) )) sys.stdout.flush() if epoch_trained >= FLAGS.num_per_epoch: break print('\n') if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) if epoch_index%800==0: model.saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_name))
def libchatbot(save_dir='models/personality', max_length=500, beam_width=2, relevance=-1., temperature=1.0, topn=-1): model_path, config_path, vocab_path = get_paths(save_dir) # Arguments passed to sample.py direct us to a saved model. # Load the separate arguments by which that model was previously trained. # That's saved_args. Use those to load the model. saved_args = None chars = None vocab = None with open(config_path, 'rb') as f: saved_args = pickle.load(f) # Separately load chars and vocab from the save directory. with open(vocab_path, 'rb') as f: chars, vocab = pickle.load(f) # Create the model from the saved arguments, in inference mode. print("Creating model...") saved_args.batch_size = beam_width net = Model(saved_args, True) config = tf.ConfigProto() config.gpu_options.allow_growth = True # Make tensorflow less verbose; filter out info (1+) and warnings (2+) but not errors (3). os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' session = tf.Session(config=config) session.__enter__() tf.global_variables_initializer().run() saver = tf.train.Saver(net.save_variables_list()) # Restore the saved variables, replacing the initialized values. print("Restoring weights...") saver.restore(session, model_path) states = initial_state_with_relevance_masking(net, session, relevance) args = { 'session': session, 'states': states } def consumer(text, args=args, net=net, vocab=vocab, max_length=max_length, relevance=relevance, temperature=temperature, beam_width=beam_width, topn=topn): user_input = text states = args['states'] session = args['session'] states = forward_text(net, session, states, relevance, vocab, sanitize_text(vocab, "> " + user_input + "\n>")) computer_response_generator = beam_search_generator(sess=session, net=net, initial_state=copy.deepcopy(states), initial_sample=vocab[' '], early_term_token=vocab['\n'], beam_width=beam_width, forward_model_fn=forward_with_mask, forward_args={'relevance':relevance, 'mask_reset_token':vocab['\n'], 'forbidden_token':vocab['>'], 'temperature':temperature, 'topn':topn}) out_chars = [] result = '' for i, char_token in enumerate(computer_response_generator): out_chars.append(chars[char_token]) result += chars[char_token] #print(possibly_escaped_char(out_chars), end='', flush=True) #with open('op.txt', 'a') as f2: # f2.write(possibly_escaped_char(out_chars)) states = forward_text(net, session, states, relevance, vocab, chars[char_token]) if i >= max_length: break states = forward_text(net, session, states, relevance, vocab, sanitize_text(vocab, "\n> ")) args['states'] = states args['session'] = session return result def save_states(name): with open(name + '.pkl', 'wb') as f: pickle.dump(args['states'], f) def load_states(name): with open(name + '.pkl', 'rb') as f: args['states'] = pickle.load(f) def reset_states(net=net, relevance=relevance): args['states'] = initial_state_with_relevance_masking(net, args['session'], relevance) return save_states, load_states, reset_states, consumer
word3 = "man" word4 = "girl" sentence = "I am red" sentence2 = "I am blue" sentence3 = "I am green" messages = [word, word2, word3, word4, sentence, sentence2, sentence3] # Reduce logging output. tf.logging.set_verbosity(tf.logging.ERROR) list_embedding = [] with tf.Session() as session: session.run([tf.global_variables_initializer(), tf.tables_initializer()]) message_embeddings = session.run(embed(messages)) for i, message_embedding in enumerate(np.array(message_embeddings).tolist()): print("Message: {}".format(messages[i])) print("Embedding size: {}".format(len(message_embedding))) list_embedding.append(message_embedding) message_embedding_snippet = ", ".join( (str(x) for x in message_embedding[:3])) print("Embedding: [{}, ...]\n".format(message_embedding_snippet)) # Compute a representation for each message, showing various lengths supported. messages = ["That band rocks!", "That song is really cool."] temp = np.array(list_embedding[0]) - np.array(list_embedding[2]) + np.array(list_embedding[3])
def train(data_dir, batch_size, net_name, epochs, start_epoch, start_iter, change_train_data_epoch, learning_rate, decay_rate, decay_steps, val_rate, save_rate, checkpoint_dir, log_dir, training): print('==> Get train and test data...') dataloader = DataSet(data_dir, batch_size, training) train_1w_batch = dataloader.train_1w_loader() train_b_batch = dataloader.train_b_loader() train_data_size_list = np.array([dataloader.nbr_train_1w, dataloader.nbr_train_b]) print('==> Finished!') print('==> Create YOLOv3') print('--- use ', net_name) inputs_x = tf.placeholder(tf.float32, [None, cfg.IMG_HEIGHT, cfg.IMG_WIDTH, 3]) model = getattr(net,net_name)(inputs_x, training) total_grid_cell_attr = 5 + cfg.NUM_CLASS inputs_y = [tf.placeholder(tf.float32, [None, cfg.SCALES[0], cfg.SCALES[0], total_grid_cell_attr]), tf.placeholder(tf.float32, [None, cfg.SCALES[1], cfg.SCALES[1], total_grid_cell_attr]), tf.placeholder(tf.float32, [None, cfg.SCALES[2], cfg.SCALES[2], total_grid_cell_attr])] yolo_v3 = YOLOv3(model, inputs_y, batch_size=batch_size, is_training=training) print('==> Finished!') print('==> Get each scale total loss') loss = yolo_v3.loss print('==> Finished!') print('==> Create optimizer') print('--- epochs = %d' % epochs) print('--- train_data_size = ', train_data_size_list) print('--- learning_rate = %f' % learning_rate) print('--- update learning_rate: ') print('--- \tlearning_rate = learning_rate * decay_rate^(global_step / decay_step)') print('--- decay_rate = %f' % decay_rate) total_step_list = [change_train_data_epoch * np.ceil(train_data_size_list[0] / batch_size), (epochs - change_train_data_epoch) * np.ceil(train_data_size_list[1] / batch_size)] print('--- total_step = ', total_step_list) print('--- start_epochs = %d' % start_epoch) train_iter_max_list = np.ceil(train_data_size_list / batch_size) print('--- train iter_max = ', train_iter_max_list) global_step = (start_epoch * train_iter_max_list[0] + start_iter) if start_epoch < change_train_data_epoch else (change_train_data_epoch * train_iter_max_list[0] + start_iter) print('--- global_step = %d' % global_step) global_step = tf.Variable(start_epoch * train_iter_max_list[0] + start_iter, trainable=False) print('change train data epoch = %d' % (change_train_data_epoch)) # [0,0,1],[0,1,0],[1,0,0]/ [1,0,1],[0,1,0],[1,0,1] # learning_rate = learning_rate * decay_rate^(global_step / decay_steps) learning_rate = tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=global_step) print('==> Finished!') # init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) init_op = tf.group(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() # train_writer = tf.summary.FileWriter(log_dir, flush_secs=60) # val_writer = tf.summary.FileWriter(log_dir, flush_secs=60) train_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), flush_secs=60) with tf.Session() as sess: print('==> Load checkpoing') if len(os.listdir(checkpoint_dir)) >= 4: print('--> Restoring checkpoint from: ' + checkpoint_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) sess.run(tf.local_variables_initializer()) print('==> Load finished!') else: print('==> No checkpoint, start training new model') print('==> Init global variables') sess.run(init_op) train_writer.add_graph(sess.graph) print('==> Init finished!') print('==> Training start') epoch = start_epoch print('--- epoch = %d' % epoch) iter = start_iter print('--- iter = %d' % iter) summary_iter = 5 print('--- save_rate = %d' % save_rate) print('--- val_rate = %d' % val_rate) save_path = os.path.join(checkpoint_dir) start_time = time.time() train_iter_max = train_iter_max_list[0] if epoch < change_train_data_epoch else train_iter_max_list[1] step = 0 val_iter = 0 total_val_loss = 0 best_loss = 2147483647 total_step = 0 set_total_step = False total_loss = 0 step_loss = 100000 while epoch < epochs: while iter < train_iter_max: # ================== train ================== if epoch >= change_train_data_epoch: batch = next(train_b_batch) if set_total_step: total_step = total_step_list[1] train_iter_max = train_iter_max_list[1] set_total_step = False else: batch = next(train_1w_batch) if not set_total_step: total_step = total_step_list[0] train_iter_max = train_iter_max_list[0] set_total_step = True feed_dict = { inputs_x: batch[0], inputs_y[0]: batch[1][0], inputs_y[1]: batch[1][1], inputs_y[2]: batch[1][2] } _, total_loss,best_confidence_mask_test,label_object_mask_test = sess.run([train_op, loss, yolo_v3.best_confidence_mask_test, yolo_v3.label_object_mask_test], feed_dict=feed_dict) eta = remain_time(start_time, total_step, step) print('--- Epoch {}, Iter {}, ETA {:.2f}m, loss {:.3f}'.format(epoch, iter, eta, total_loss)) # ================== train ================== # ================== val ================== #if (step + 1) % val_rate == 0: #print('==> Val test start') #val_step = 0 #val_log_path = os.path.join(log_dir, 'val' + str((step + 1) // val_rate)) #if not os.path.isdir(val_log_path): #os.makedirs(val_log_path) ## val_writer = tf.summary.FileWriter(val_log_path, flush_secs=60) #if epoch >= change_train_data_epoch: #val_batch = dataloader.val_b_loader() #print('--- val data size: ', dataloader.nbr_val_b) #else: #val_batch = dataloader.val_1w_loader() #print('--- val data size: ', dataloader.nbr_val_1w) #for batch in val_batch: #feed_dict = { #inputs_x: batch[0], #inputs_y[0]: batch[1][0], #inputs_y[1]: batch[1][1], #inputs_y[2]: batch[1][2] #} #val_loss = sess.run(loss, feed_dict=feed_dict) #total_val_loss += val_loss ## val_writer.add_summary(summary_str, val_step) #val_step += 1 #total_val_loss /= (val_step + 0.001) #print('-- Val loss {:.3f}'.format(total_val_loss), end=' ') #if total_val_loss < best_loss: #tmp = best_loss #best_loss = total_val_loss #print('Better then {:.3f}'.format(tmp)) #model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \ #str(iter) + '-bestloss_' + '%.3f' % best_loss #saver.save(sess, save_path=model_name, global_step=step) #print('--- save checkpoint best_loss: %.3f' % best_loss) #else: #print('Not better than {:.3f}'.format(best_loss)) #total_val_loss = 0 # ================== val ================== if total_loss < step_loss: step_loss = total_loss # if (step + 1) % save_rate == 0: model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \ str(iter) + '-loss_' + '%.3f' % total_loss saver.save(sess, save_path=model_name, global_step=step) print('--- save checkpoint loss: %.3f' % total_loss) start_time = time.time() step += 1 iter += 1 val_iter += 1 global_step += 1 iter = 0 epoch += 1 model_name = save_path + os.sep + 'yolov3.ckpt' + '-epoch_' + str(epoch) + '_' + \ str(iter) + '-loss_' + '%.3f' % total_loss[0] saver.save(sess, save_path=model_name, global_step=step) print('--- save checkpoint loss: %.3f' % total_loss) print('==> Training Finished!')
def img_removal_by_embed(root_dir, output_dir, pb_path, node_dict, threshold=0.7, type='copy', GPU_ratio=None, dataset_range=None): # ----var img_format = {"png", 'jpg', 'bmp'} batch_size = 64 # ----collect all folders dirs = [obj.path for obj in os.scandir(root_dir) if obj.is_dir()] if len(dirs) == 0: print("No sub-dirs in ", root_dir) else: #----dataset range if dataset_range is not None: dirs = dirs[dataset_range[0]:dataset_range[1]] # ----model init sess, tf_dict = model_restore_from_pb(pb_path, node_dict, GPU_ratio=GPU_ratio) tf_input = tf_dict['input'] tf_phase_train = tf_dict['phase_train'] tf_embeddings = tf_dict['embeddings'] model_shape = [None, 160, 160, 3] feed_dict = {tf_phase_train: False} # ----tf setting for calculating distance with tf.Graph().as_default(): tf_tar = tf.placeholder(dtype=tf.float32, shape=tf_embeddings.shape[-1]) tf_ref = tf.placeholder(dtype=tf.float32, shape=tf_embeddings.shape) tf_dis = tf.sqrt( tf.reduce_sum(tf.square(tf.subtract(tf_ref, tf_tar)), axis=1)) # ----GPU setting config = tf.ConfigProto( log_device_placement=True, allow_soft_placement=True, # 允許當找不到設備時自動轉換成有支援的設備 ) config.gpu_options.allow_growth = True sess_cal = tf.Session(config=config) sess_cal.run(tf.global_variables_initializer()) #----process each folder for dir_path in dirs: paths = [ file.path for file in os.scandir(dir_path) if file.name.split(".")[-1] in img_format ] len_path = len(paths) if len_path == 0: print("No images in ", dir_path) else: # ----create the sub folder in the output folder save_dir = os.path.join(output_dir, dir_path.split("\\")[-1]) if not os.path.exists(save_dir): os.makedirs(save_dir) # ----calculate embeddings ites = math.ceil(len_path / batch_size) embeddings = np.zeros([len_path, tf_embeddings.shape[-1]], dtype=np.float32) for idx in range(ites): num_start = idx * batch_size num_end = np.minimum(num_start + batch_size, len_path) # ----read batch data batch_dim = [num_end - num_start] #[64] batch_dim.extend(model_shape[1:]) #[64,160, 160, 3] batch_data = np.zeros(batch_dim, dtype=np.float32) for idx_path, path in enumerate(paths[num_start:num_end]): img = cv2.imread(path) if img is None: print("Read failed:", path) else: img = cv2.resize(img, (model_shape[2], model_shape[1])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) batch_data[idx_path] = img batch_data /= 255 # norm feed_dict[tf_input] = batch_data embeddings[num_start:num_end] = sess.run( tf_embeddings, feed_dict=feed_dict) # ----calculate ave distance of each image feed_dict_2 = {tf_ref: embeddings} ave_dis = np.zeros(embeddings.shape[0], dtype=np.float32) for idx, embedding in enumerate(embeddings): feed_dict_2[tf_tar] = embedding distance = sess_cal.run(tf_dis, feed_dict=feed_dict_2) ave_dis[idx] = np.sum(distance) / (embeddings.shape[0] - 1) # ----remove or copy images for idx, path in enumerate(paths): if ave_dis[idx] > threshold: print("path:{}, ave_distance:{}".format( path, ave_dis[idx])) if type == "copy": save_path = os.path.join(save_dir, path.split("\\")[-1]) shutil.copy(path, save_path) elif type == "move": save_path = os.path.join(save_dir, path.split("\\")[-1]) shutil.move(path, save_path)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): assert FLAGS.batch_size % FLAGS.num_gpus == 0, ('Batch size must be divisible by number of GPUs') bs_l = FLAGS.batch_size * FLAGS.label_ratio bs_u = FLAGS.batch_size * (1 - FLAGS.label_ratio) num_iter_per_epoch = int(FLAGS.num_train_u / bs_u) max_steps = int(FLAGS.num_epochs * num_iter_per_epoch) num_classes = FLAGS.num_classes global_step = slim.create_global_step() lr = tf.placeholder(tf.float32, shape=[], name="learning_rate") opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9, use_nesterov=True) images_l, labels_l = utils.prepare_traindata(FLAGS.dataset_dir_l, int(bs_l)) images_u, labels_u = utils.prepare_traindata(FLAGS.dataset_dir_u, int(bs_u)) images_splits_l = tf.split(images_l, FLAGS.num_gpus, 0) images_splits_u = tf.split(images_u, FLAGS.num_gpus, 0) labels_splits_l = tf.split(labels_l, FLAGS.num_gpus, 0) labels_splits_u = tf.split(labels_u, FLAGS.num_gpus, 0) images_splits = [] labels_splits = [] for i in range(FLAGS.num_gpus): images_splits.append(tf.concat([images_splits_l[i], images_splits_u[i]], 0)) labels_splits.append(tf.concat([labels_splits_l[i], labels_splits_u[i]], 0)) tower_grads = [] top_1_op = [] memory_op = [] reuse_variables = None for i in range(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (network.TOWER_NAME, i)) as scope: with slim.arg_scope(slim.get_model_variables(scope=scope), device='/cpu:0'): loss, loss_s, loss_m, labels, logits, memory_update = \ _build_training_graph(images_splits[i], labels_splits[i], num_classes, reuse_variables) memory_op.append(memory_update) top_1_op.append(tf.nn.in_top_k(logits, labels, 1)) reuse_variables = True summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) batchnorm = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) grads = opt.compute_gradients(loss) tower_grads.append(grads) grads = network.average_gradients(tower_grads) gradient_op = opt.apply_gradients(grads, global_step=global_step) var_averages = tf.train.ExponentialMovingAverage(FLAGS.ema_decay, global_step) var_op = var_averages.apply(tf.trainable_variables()) batchnorm_op = tf.group(*batchnorm) train_op = tf.group(gradient_op, var_op, batchnorm_op) saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) summary_op = tf.summary.merge(summaries) init_op = tf.global_variables_initializer() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if FLAGS.gpu_memory: config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory sess = tf.Session(config=config) boundaries, values = utils.config_lr(max_steps) sess.run([init_op], feed_dict={lr: values[0]}) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) iter_count = epoch = sum_loss = sum_loss_s = sum_loss_m = sum_top_1 = 0 start = time.time() for step in range(max_steps): decayed_lr = utils.decay_lr(step, boundaries, values, max_steps) _, _, loss_value, loss_value_s, loss_value_m, top_1_value = \ sess.run([train_op, memory_op, loss, loss_s, loss_m, top_1_op], feed_dict={lr: decayed_lr}) sum_loss += loss_value sum_loss_s += loss_value_s sum_loss_m += loss_value_m top_1_value = np.sum(top_1_value) / bs_l sum_top_1 += top_1_value iter_count +=1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' assert not np.isnan(loss_value_s), 'Model diverged with loss = NaN' assert not np.isnan(loss_value_m), 'Model diverged with loss = NaN' if step % num_iter_per_epoch == 0 and step > 0: end = time.time() sum_loss = sum_loss / num_iter_per_epoch sum_loss_s = sum_loss_s / num_iter_per_epoch sum_loss_m = sum_loss_m / num_iter_per_epoch sum_top_1 = min(sum_top_1 / num_iter_per_epoch, 1.0) time_per_iter = float(end - start) / iter_count format_str = ('epoch %d, L = %.2f, Ls = %.2f, Lm = %.2f, top_1 = %.2f, lr = %.4f (time_per_iter: %.4f s)') print(format_str % (epoch, sum_loss, sum_loss_s, sum_loss_m, sum_top_1*100, decayed_lr, time_per_iter)) epoch +=1 sum_loss = sum_loss_s = sum_loss_m = sum_top_1 = 0 if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict={lr: decayed_lr}) summary_writer.add_summary(summary_str, step) if (step + 1) == max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=epoch)
def train(sess, env, actor4, critic4): # Set up summary ops summary_ops, summary_vars = build_summaries() # Initialize Tensorflow variables sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph) # Initialize target network weights actor4.update_target_network() critic4.update_target_network() # Initialize replay memory replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED) for i in xrange(MAX_EPISODES): s = env.reset() episode_reward = 0 episode_ave_max_q = 0 # angle = np.zeros(MAX_STEPS_EPISODE) noise = ExplorationNoise.ou_noise(OU_THETA, OU_MU, OU_SIGMA, MAX_STEPS_EPISODE) noise = ExplorationNoise.exp_decay(noise, EXPLORATION_TIME) for j in xrange(MAX_STEPS_EPISODE): if RENDER_ENV and i % 10 == 0: env.render() # Add exploratory noise according to Ornstein-Uhlenbeck process to action # Decay exploration exponentially from 1 to 0 in EXPLORATION_TIME steps if i < EXPLORATION_TIME: a = actor4.predict( np.reshape(s, (1, env.observation_space.shape[0]))) + noise[j] else: a = actor4.predict( np.reshape(s, (1, env.observation_space.shape[0]))) s2, r, terminal, info = env.step(a) # print s2 if i % 10 == 0: print a #print actor2.state_dim,"\t",actor2.action_dim # plt.figure(2) # plt.plot(j,s2[0], hold=True) # plt.show() # plt.hold(True) #if j%100 == 0: # print j, s2 replay_buffer.add(np.reshape(s, actor4.state_dim), np.reshape(a, actor4.action_dim), r, terminal, np.reshape(s2, actor4.state_dim)) # Keep adding experience to the memory until # there are at least minibatch size samples if replay_buffer.size() > MINIBATCH_SIZE: s_batch, a_batch, r_batch, t_batch, s2_batch = \ replay_buffer.sample_batch(MINIBATCH_SIZE) # Calculate targets target_q = critic4.predict_target( s2_batch, actor4.predict_target(s2_batch)) y_i = [] for k in xrange(MINIBATCH_SIZE): # If state is terminal assign reward only if t_batch[k]: y_i.append(r_batch[k]) # Else assgin reward + net target Q else: y_i.append(r_batch[k] + GAMMA * target_q[k]) # Update the critic given the targets predicted_q_value, _ = \ critic4.train(s_batch, a_batch, np.reshape(y_i, (MINIBATCH_SIZE, 1))) episode_ave_max_q += np.amax(predicted_q_value) # Update the actor policy using the sampled gradient a_outs = actor4.predict(s_batch) a_grads = critic4.action_gradients(s_batch, a_outs) actor4.train(s_batch, a_grads[0]) # Update target networks actor4.update_target_network() critic4.update_target_network() s = s2 # angle[j] = s episode_reward += r if terminal or j == MAX_STEPS_EPISODE - 1: summary_str = sess.run(summary_ops, feed_dict={ summary_vars[0]: episode_reward[0], summary_vars[1]: episode_ave_max_q }) #plt.plot(angle) #plt.show() # print s2 writer.add_summary(summary_str, i) writer.flush() print 'Reward: %.2i' % int(episode_reward), ' | Episode', i, \ '| Qmax: %.4f' % (episode_ave_max_q / float(j)) REWARD.append(episode_reward) QMAX.append(episode_ave_max_q) break
def main(empresa): webhoseio.config(token="90a4a1a8-5016-4023-bdd6-8b302321a632") #Cargar lista de empresas a buscar salida = pd.DataFrame() empresa=empresa.replace(", Inc.","") empresa=empresa.replace(", Inc","") empresa=empresa.replace("Inc.","") empresa=empresa.replace("Inc","") query_params = { "q": "\"" + empresa + "\" site_type:news language:english (site:cnn.com OR site:wsj.com OR site:forbes.com OR site:marketwatch.com OR site:thestreet.com OR site:thisismoney.co.uk OR site:kiplinger.com site:bloomberg.com OR site:highpointobserver.com)", "ts": str(round(dt(dt.now().year,dt.now().month,dt.now().day,0,0).timestamp())), "sort": "relevancy" } output = webhoseio.query("filterWebContent", query_params) output = pd.DataFrame(output['posts']) if output.shape[0] > 0: for i in range(output.shape[0]): output.loc[i,'text'] = output.loc[i,'text'].replace('\n','') output.loc[i,'published'] = output.loc[i,'published'][0:10] salida = salida.append(output[['published','text']],ignore_index = True) reviews = salida["text"] labels = pd.Series(np.zeros(len(reviews))) # In[3]: # with open('news.txt', 'r') as f: # reviews = f.read() # with open('results.txt', 'r') as f: # labels = f.read() # In[4]: reviews[:2000] # ## Data preprocessing # # The first step when building a neural network model is getting your data into the proper form to feed into the network. Since we're using embedding layers, we'll need to encode each word with an integer. We'll also want to clean it up a bit. # # You can see an example of the reviews data above. We'll want to get rid of those periods. Also, you might notice that the reviews are delimited with newlines `\n`. To deal with those, I'm going to split the text into each review using `\n` as the delimiter. Then I can combined all the reviews back together into one big string. # # First, let's remove all punctuation. Then get all the text without the newlines and split it into individual words. # In[5]: from string import punctuation all_text = ''.join([c for c in reviews if c not in punctuation]) all_text=''.join([i for i in all_text if not i.isnumeric()]) reviews = all_text.split('\n') all_text = ' '.join(reviews) words = all_text.split() # In[6]: all_text[:2000] # In[7]: words[10000:10100] # ### Encoding the words # # The embedding lookup requires that we pass in integers to our network. The easiest way to do this is to create dictionaries that map the words in the vocabulary to integers. Then we can convert each of our reviews into integers so they can be passed into the network. # # > **Exercise:** Now you're going to encode the words with integers. Build a dictionary that maps words to integers. Later we're going to pad our input vectors with zeros, so make sure the integers **start at 1, not 0**. # > Also, convert the reviews to integers and store the reviews in a new list called `reviews_ints`. # In[8]: from collections import Counter counts = Counter(words) vocab = sorted(counts, key=counts.get, reverse=True) vocab_to_int = {word: ii for ii, word in enumerate(vocab, 1)} reviews_ints = [] for each in reviews: reviews_ints.append([vocab_to_int[word] for word in each.split()]) # ### Encoding the labels # # Our labels are "positive" or "negative". To use these labels in our network, we need to convert them to 0 and 1. # # > **Exercise:** Convert labels from `positive` and `negative` to 1 and 0, respectively. # In[9]: # labels = labels.split('\n') # labels = np.array([1 if each == 1 else 0 for each in labels]) # In[10]: review_lens = Counter([len(x) for x in reviews_ints]) # Okay, a couple issues here. We seem to have one review with zero length. And, the maximum review length is way too many steps for our RNN. Let's truncate to 200 steps. For reviews shorter than 200, we'll pad with 0s. For reviews longer than 200, we can truncate them to the first 200 characters. # # > **Exercise:** First, remove the review with zero length from the `reviews_ints` list. # In[11]: non_zero_idx = [ii for ii, review in enumerate(reviews_ints) if len(review) != 0] len(non_zero_idx) # In[12]: reviews_ints[-1] # Turns out its the final review that has zero length. But that might not always be the case, so let's make it more general. # In[13]: reviews_ints = [reviews_ints[ii] for ii in non_zero_idx] labels = np.array([labels[ii] for ii in non_zero_idx]) # > **Exercise:** Now, create an array `features` that contains the data we'll pass to the network. The data should come from `review_ints`, since we want to feed integers to the network. Each row should be 200 elements long. For reviews shorter than 200 words, left pad with 0s. That is, if the review is `['best', 'movie', 'ever']`, `[117, 18, 128]` as integers, the row will look like `[0, 0, 0, ..., 0, 117, 18, 128]`. For reviews longer than 200, use on the first 200 words as the feature vector. # # This isn't trivial and there are a bunch of ways to do this. But, if you're going to be building your own deep learning networks, you're going to have to get used to preparing your data. # # # In[14]: seq_len = 200 features = np.zeros((len(reviews_ints), seq_len), dtype=int) for i, row in enumerate(reviews_ints): features[i, -len(row):] = np.array(row)[:seq_len] # In[15]: features[:1,:200] # ## Training, Validation, Test # # # With our data in nice shape, we'll split it into training, validation, and test sets. # # > **Exercise:** Create the training, validation, and test sets here. You'll need to create sets for the features and the labels, `train_x` and `train_y` for example. Define a split fraction, `split_frac` as the fraction of data to keep in the training set. Usually this is set to 0.8 or 0.9. The rest of the data will be split in half to create the validation and testing data. # In[16]: split_frac = 0.8 split_idx = int(len(features)*0.8) train_x, val_x = features[:split_idx], features[split_idx:] train_y, val_y = labels[:split_idx], labels[split_idx:] test_idx = int(len(val_x)*0.5) val_x, test_x = val_x[:test_idx], val_x[test_idx:] val_y, test_y = val_y[:test_idx], val_y[test_idx:] # # ## Build the graph # # Here, we'll build the graph. First up, defining the hyperparameters. # # * `lstm_size`: Number of units in the hidden layers in the LSTM cells. Usually larger is better performance wise. Common values are 128, 256, 512, etc. # * `lstm_layers`: Number of LSTM layers in the network. I'd start with 1, then add more if I'm underfitting. # * `batch_size`: The number of reviews to feed the network in one training pass. Typically this should be set as high as you can go without running out of memory. # * `learning_rate`: Learning rate # In[17]: lstm_size = 256 lstm_layers = 1 batch_size = 1 learning_rate = 0.001 # For the network itself, we'll be passing in our 200 element long review vectors. Each batch will be `batch_size` vectors. We'll also be using dropout on the LSTM layer, so we'll make a placeholder for the keep probability. # > **Exercise:** Create the `inputs_`, `labels_`, and drop out `keep_prob` placeholders using `tf.placeholder`. `labels_` needs to be two-dimensional to work with some functions later. Since `keep_prob` is a scalar (a 0-dimensional tensor), you shouldn't provide a size to `tf.placeholder`. # In[18]: n_words = len(vocab_to_int) # Create the graph object graph = tf.Graph() # Add nodes to the graph with graph.as_default(): inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs') labels_ = tf.placeholder(tf.int32, [None, None], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # ### Embedding # # Now we'll add an embedding layer. We need to do this because there are 74000 words in our vocabulary. It is massively inefficient to one-hot encode our classes here. You should remember dealing with this problem from the word2vec lesson. Instead of one-hot encoding, we can have an embedding layer and use that layer as a lookup table. You could train an embedding layer using word2vec, then load it here. But, it's fine to just make a new layer and let the network learn the weights. # # > **Exercise:** Create the embedding lookup matrix as a `tf.Variable`. Use that embedding matrix to get the embedded vectors to pass to the LSTM cell with [`tf.nn.embedding_lookup`](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup). This function takes the embedding matrix and an input tensor, such as the review vectors. Then, it'll return another tensor with the embedded vectors. So, if the embedding layer as 200 units, the function will return a tensor with size [batch_size, 200]. # # # In[19]: # Size of the embedding vectors (number of units in the embedding layer) embed_size = 300 with graph.as_default(): embedding = tf.Variable(tf.random_uniform((n_words, embed_size), -1, 1)) embed = tf.nn.embedding_lookup(embedding, inputs_) # ### LSTM cell # # <img src="assets/network_diagram.png" width=400px> # # Next, we'll create our LSTM cells to use in the recurrent network ([TensorFlow documentation](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn)). Here we are just defining what the cells look like. This isn't actually building the graph, just defining the type of cells we want in our graph. # # To create a basic LSTM cell for the graph, you'll want to use `tf.contrib.rnn.BasicLSTMCell`. Looking at the function documentation: # # ``` # tf.contrib.rnn.BasicLSTMCell(num_units, forget_bias=1.0, input_size=None, state_is_tuple=True, activation=<function tanh at 0x109f1ef28>) # ``` # # you can see it takes a parameter called `num_units`, the number of units in the cell, called `lstm_size` in this code. So then, you can write something like # # ``` # lstm = tf.contrib.rnn.BasicLSTMCell(num_units) # ``` # # to create an LSTM cell with `num_units`. Next, you can add dropout to the cell with `tf.contrib.rnn.DropoutWrapper`. This just wraps the cell in another cell, but with dropout added to the inputs and/or outputs. It's a really convenient way to make your network better with almost no effort! So you'd do something like # # ``` # drop = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob) # ``` # # Most of the time, you're network will have better performance with more layers. That's sort of the magic of deep learning, adding more layers allows the network to learn really complex relationships. Again, there is a simple way to create multiple layers of LSTM cells with `tf.contrib.rnn.MultiRNNCell`: # # ``` # cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers) # ``` # # Here, `[drop] * lstm_layers` creates a list of cells (`drop`) that is `lstm_layers` long. The `MultiRNNCell` wrapper builds this into multiple layers of RNN cells, one for each cell in the list. # # So the final cell you're using in the network is actually multiple (or just one) LSTM cells with dropout. But it all works the same from an achitectural viewpoint, just a more complicated graph in the cell. # # > **Exercise:** Below, use `tf.contrib.rnn.BasicLSTMCell` to create an LSTM cell. Then, add drop out to it with `tf.contrib.rnn.DropoutWrapper`. Finally, create multiple LSTM layers with `tf.contrib.rnn.MultiRNNCell`. # # Here is [a tutorial on building RNNs](https://www.tensorflow.org/tutorials/recurrent) that will help you out. # # In[20]: with graph.as_default(): # Your basic LSTM cell lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size) # Add dropout to the cell drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob) # Stack up multiple LSTM layers, for deep learning cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers) # Getting an initial state of all zeros initial_state = cell.zero_state(batch_size, tf.float32) # ### RNN forward pass # # <img src="assets/network_diagram.png" width=400px> # # Now we need to actually run the data through the RNN nodes. You can use [`tf.nn.dynamic_rnn`](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn) to do this. You'd pass in the RNN cell you created (our multiple layered LSTM `cell` for instance), and the inputs to the network. # # ``` # outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state) # ``` # # Above I created an initial state, `initial_state`, to pass to the RNN. This is the cell state that is passed between the hidden layers in successive time steps. `tf.nn.dynamic_rnn` takes care of most of the work for us. We pass in our cell and the input to the cell, then it does the unrolling and everything else for us. It returns outputs for each time step and the final_state of the hidden layer. # # > **Exercise:** Use `tf.nn.dynamic_rnn` to add the forward pass through the RNN. Remember that we're actually passing in vectors from the embedding layer, `embed`. # # # In[21]: with graph.as_default(): outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state) # ### Output # # We only care about the final output, we'll be using that as our sentiment prediction. So we need to grab the last output with `outputs[:, -1]`, the calculate the cost from that and `labels_`. # In[22]: with graph.as_default(): predictions = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.sigmoid) cost = tf.losses.mean_squared_error(labels_, predictions) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # ### Validation accuracy # # Here we can add a few nodes to calculate the accuracy which we'll use in the validation pass. # In[23]: with graph.as_default(): correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # ### Batching # # This is a simple function for returning batches from our data. First it removes data such that we only have full batches. Then it iterates through the `x` and `y` arrays and returns slices out of those arrays with size `[batch_size]`. # In[24]: def get_batches(x, y, batch_size=1): n_batches = len(x)//batch_size x, y = x[:n_batches*batch_size], y[:n_batches*batch_size] for ii in range(0, len(x), batch_size): yield x[ii:ii+batch_size], y[ii:ii+batch_size] # ## Training # # Below is the typical training code. If you want to do this yourself, feel free to delete all this code and implement it yourself. Before you run this, make sure the `checkpoints` directory exists. # In[28]: epochs = 1 with graph.as_default(): saver = tf.train.Saver() with tf.Session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) iteration = 1 for e in range(epochs): state = sess.run(initial_state) for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1): feed = {inputs_: x, labels_: y[:, None], keep_prob: 0.5, initial_state: state} loss, state, _ = sess.run([cost, final_state, optimizer], feed_dict=feed) if iteration%25==0: val_acc = [] val_state = sess.run(cell.zero_state(batch_size, tf.float32)) for x, y in get_batches(val_x, val_y, batch_size): feed = {inputs_: x, labels_: y[:, None], keep_prob: 1, initial_state: val_state} batch_acc, val_state, pred = sess.run([accuracy, final_state, predictions], feed_dict=feed) val_acc.append(batch_acc) iteration +=1 break saver.save(sess, "checkpoints/sentiment.ckpt") # ## Testing # In[30]: test_acc = [] with tf.Session(graph=graph) as sess: saver.restore(sess, tf.train.latest_checkpoint('checkpoints')) test_state = sess.run(cell.zero_state(batch_size, tf.float32)) for ii, (x, y) in enumerate(get_batches(test_x, test_y, batch_size), 1): feed = {inputs_: x, labels_: y[:, None], keep_prob: 1, initial_state: test_state} batch_acc, test_state, pred = sess.run([accuracy, final_state, predictions], feed_dict=feed) test_acc.append(batch_acc) pred=pred[0][0] if pred > 0.5: consejo = 'up' else: consejo = 'down' return consejo
# y_pred is the predicted y-value from our graph y_pred = a * x_in + b # y_act is a placeholder for the actual y-value # associated with x_in y_act = tf.placeholder(tf.float32, [None]) # Define our "loss" function, which is the same # squared-difference as before. Recall that we do # this instead of absolute value because it's # easier to differentiate. squared_diff = tf.square(y_pred - y_act) # Create a SGD optimizer (built-in to TensorFlow) train_step = tf.train.GradientDescentOptimizer(0.001).minimize(squared_diff) # Setup the TensorFlow session sess = tf.InteractiveSession() tf.global_variables_initializer().run() # This is the train loop. Here we pick a point, and # direct TensorFlow to train the graph off that point. for i in range(10000): xp, yp = x[i % num_pts], y[i % num_pts] sess.run(train_step, feed_dict={x_in: [xp], y_act: [yp]}) # Grap the final values for a and b after training # and print them for the user. trained_a = sess.run(a) trained_b = sess.run(b) print('Trained Values: a = {}, b = {}'.format(trained_a, trained_b))
def optimize_graph(logger=None, verbose=False): if not logger: logger = set_logger(colored('BERT_VEC', 'yellow'), verbose) try: # we don't need GPU for optimizing the graph from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference tf.gfile.MakeDirs(args.output_dir) config_fp = args.config_name logger.info('model config: %s' % config_fp) # 加载bert配置文件 with tf.gfile.GFile(config_fp, 'r') as f: bert_config = modeling.BertConfig.from_dict(json.load(f)) logger.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope with jit_scope(): input_tensors = [input_ids, input_mask, input_type_ids] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) # 获取所有要训练的变量 tvars = tf.trainable_variables() init_checkpoint = args.ckpt_name (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 共享卷积核 with tf.variable_scope("pooling"): # 如果只有一层,就只取对应那一层的weight if len(args.layer_indexes) == 1: encoder_layer = model.all_encoder_layers[args.layer_indexes[0]] else: # 否则遍历需要取的层,把所有层的weight取出来并拼接起来shape:768*层数 all_layers = [model.all_encoder_layers[l] for l in args.layer_indexes] encoder_layer = tf.concat(all_layers, -1) mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1) masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / ( tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10) input_mask = tf.cast(input_mask, tf.float32) # 以下代码是句向量的生成方法,可以理解为做了一个卷积的操作,但是没有把结果相加, 卷积核是input_mask pooled = masked_reduce_mean(encoder_layer, input_mask) pooled = tf.identity(pooled, 'final_encodes') output_tensors = [pooled] tmp_g = tf.get_default_graph().as_graph_def() # allow_soft_placement:自动选择运行设备 config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: logger.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) logger.info('freeze...') tmp_g = tf.graph_util.convert_variables_to_constants(sess, tmp_g, [n.name[:-2] for n in output_tensors]) dtypes = [n.dtype for n in input_tensors] logger.info('optimize...') tmp_g = optimize_for_inference( tmp_g, [n.name[:-2] for n in input_tensors], [n.name[:-2] for n in output_tensors], [dtype.as_datatype_enum for dtype in dtypes], False) tmp_file = tempfile.NamedTemporaryFile('w', delete=False, dir=args.output_dir).name logger.info('write graph to a tmp file: %s' % tmp_file) with tf.gfile.GFile(tmp_file, 'wb') as f: f.write(tmp_g.SerializeToString()) return tmp_file except Exception as e: logger.error('fail to optimize the graph!') logger.error(e)
def train(x): """ Trains the neural net :param x: Features placeholder :return: Trained neural net """ prediction = convNeuralNet(x) #print prediction with tf.name_scope('cross_entropy'): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y)) tf.summary.scalar('cross_entropy',cost) with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # learning rate = 0.001 with tf.name_scope('accuracy'): correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct,'float')) tf.summary.scalar('accuracy',accuracy) # cycles of feed forward and backprop num_epochs = ne with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(os.getcwd()+tb_path) writer.add_graph(sess.graph) for epoch in range(num_epochs): epoch_loss = 0 for i in range(int(real_X_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # X.shape[0] randidx = np.random.choice(real_X_9.shape[0], batch_size, replace=False) epoch_x,epoch_y = real_X_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # X,y j,c = sess.run([optimizer,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB}) if i == 0: [ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB}) print 'Train Accuracy', ta if epoch % 50 == 0 and i == 0: #saver.save(sess,os.getcwd()+'/models/base/baseCNN18.ckpt') #print 'Checkpoint saved at',os.getcwd()+'/models/base/baseCNN18' pass # ta_list.append(ta) if i % 5 == 0: s = sess.run(merged_summary,feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB}) writer.add_summary(s,i) epoch_loss += c print '\n','Epoch', epoch + 1, 'completed out of', num_epochs, '\nLoss:',epoch_loss saver.save(sess, os.getcwd()+'/models/base/' + NAME) saver.export_meta_graph(os.getcwd()+'/models/base/' + NAME + '.meta') print 'Model saved' print '\n','Train Accuracy', accuracy.eval(feed_dict={x:real_X_9, y:real_y_9, keep_prob:TRAIN_KEEP_PROB}) print '\n','Test Accuracy', accuracy.eval(feed_dict={x:test_real_X, y:test_real_y, keep_prob:1.0}) #X, y #mnist.test.images, mnist.test.labels #saver.save(sess,'baseDNN',global_step=1000) #print 'Prediction',sess.run(prediction, feed_dict={x:testtest, keep_prob:1}) #print 'Prediction',sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1}) #print test_real_y # correct_list = [] # for i in range(len(sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1}))): # if list(test_real_y[i]).index(1) == sess.run(tf.argmax(prediction,1), feed_dict={x:testtest, keep_prob:1})[i]: # correct_list.append(True) # else: # correct_list.append(False) # print correct_list '''
def mode_base(): parameters = [] x_input = tf.placeholder(tf.float32, shape=(None, 1, 128, 1)) y_lable = tf.placeholder(tf.float32, shape=(None, 3)) # 不指定 暂时3个 # conv1_1 with tf.name_scope('conv1_1') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 1, 64], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(x_input, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv1_1 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv1_2 with tf.name_scope('conv1_2') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 64, 64], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv1_2 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # pool1 pool1 = tf.nn.max_pool(conv1_2, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME', name='pool1') # conv2_1 with tf.name_scope('conv2_1') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 64, 128], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv2_1 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv2_2 with tf.name_scope('conv2_2') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 128, 128], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv2_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv2_2 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # pool2 pool2 = tf.nn.max_pool(conv2_2, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME', name='pool2') # conv3_1 with tf.name_scope('conv3_1') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 128, 256], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv3_1 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv3_2 with tf.name_scope('conv3_2') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 256], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv3_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv3_2 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv3_3 with tf.name_scope('conv3_3') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 256], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv3_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv3_3 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # pool3 pool3 = tf.nn.max_pool(conv3_3, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME', name='pool3') # conv4_1 with tf.name_scope('conv4_1') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 256, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv4_1 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv4_2 with tf.name_scope('conv4_2') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv4_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv4_2 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv4_3 with tf.name_scope('conv4_3') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv4_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv4_3 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # pool4 pool4 = tf.nn.max_pool(conv4_3, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME', name='pool4') # conv5_1 with tf.name_scope('conv5_1') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(pool4, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv5_1 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv5_2 with tf.name_scope('conv5_2') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv5_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv5_2 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # conv5_3 with tf.name_scope('conv5_3') as scope: kernel = tf.Variable(tf.truncated_normal([1, 3, 512, 512], dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(conv5_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=True, name='biases') out = tf.nn.bias_add(conv, biases) conv5_3 = tf.nn.tanh(out, name=scope) parameters += [kernel, biases] # pool5 pool5 = tf.nn.max_pool(conv5_3, ksize=[1, 1, 2, 1], strides=[1, 1, 2, 1], padding='SAME', name='pool4') shape = int(np.prod(pool5.get_shape()[1:])) pool5_flat = tf.reshape(pool5, [-1, shape]) hiddenLayer1 = add_layer("layer1", pool5_flat, in_size=shape, out_size=2048, activation_function=tf.tanh) hiddenLayer2 = add_layer("layer2", hiddenLayer1, in_size=2048, out_size=1024, activation_function=tf.tanh) hiddenLayer3 = add_layer("layer3", hiddenLayer2, in_size=1024, out_size=512, activation_function=tf.tanh) hiddenLayer4 = add_layer("layer4", hiddenLayer3, in_size=512, out_size=128, activation_function=tf.tanh) hiddenLayer5 = add_layer("layer5", hiddenLayer4, in_size=128, out_size=16, activation_function=tf.tanh) prediction = add_layer("end", hiddenLayer5, in_size=16, out_size=3) losses = tf.nn.softmax_cross_entropy_with_logits(logits= prediction , labels=y_lable) # loss = tf.reduce_mean(tf.reduce_sum(y_lable - prediction)) # loss = -tf.reduce_mean(y_lable * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0))) loss = tf.reduce_mean(losses) train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) return x_input, y_lable, prediction, loss, train_step, sess