def train_neural_network(x): prediction = neural_network_model(x) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y)) optimizer = tf.train.AdadeltaOptimizer().minimize(cost) hm_epoches = 10 with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for epoch in range(hm_epoches): epoch_lose = 0 for _ in range(int(mnist.train.num_example / batch_size)): epoch_x, epoch_y = mnist.train.next_batch(batch_size) _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y}) epoch_lose += c print('Epoch ', epoch, ' complet out of ', hm_epoches, ' lose : ', epoch_lose) correct = tf.equal(tf.arg_max(prediction, 1), tf.arg_max(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct, 'float')) print('Accuracy :', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
def cnn_handigit(): sess = tf.InteractiveSession() # paras W_conv1 = weight_varible([5, 5, 1, 32]) b_conv1 = bias_variable([32]) # conv layer-1 x = tf.placeholder(tf.float32, [None, 784]) x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # conv layer-2 W_conv2 = weight_varible([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # full connection W_fc1 = weight_varible([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # dropout keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # output layer: softmax W_fc2 = weight_varible([1024, 10]) b_fc2 = bias_variable([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) y_ = tf.placeholder(tf.float32, [None, 10]) # model training cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.arg_max(y_conv, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() tf.add_to_collection('train_op', train_step) for i in range(200): batch = mnist.train.next_batch(50) if i % 100 == 0: train_accuacy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) print("step %d, training accuracy %g"%(i, train_accuacy)) saver.save(sess,'train_process',global_step=i) #在保存的时候 train_step.run(feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5}) # accuacy on test print("test accuracy %g"%(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})))
def compute_accuracy(y_hat, labels, sparse=False): """Compute accuracy for a 3-dimensional outputs. The prediction is assumed to be made by argmax. Parameters ---------- y_hat : tensor, shape (batch_size, n_samples, n_outputs) Raw predictions of a neural network. It is not required to convert it to softmax, because softmax is a monotonous transform. labels : tensor True labels. It can have shape (batch_size, n_samples), then each values should be an index within [0, n_classes). Or alternatively it can have shape (batch_size, n_samples, n_outputs), then for each sample a probability distribution with n_outputs values should be provided (this case also handles one-hot label encoding). In the latter case the correct label is also selected by argmax. Set `sparse` parameter to select an appropriate setting. sparse : bool, default False Whether `labels` are indices or full distributions. Returns ------- accuracy : scalar tensor Computed accuracy. """ prediction = tf.arg_max(y_hat, 2) if sparse: labels = tf.cast(labels, prediction.dtype) else: labels = tf.arg_max(labels, 2) return tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32))
def main(_): start_time = time.time() data_sets = read_data_sets() with tf.Graph().as_default(), tf.Session() as session: dictionary_size = len(data_sets.dictionary) x = tf.placeholder(tf.float32, [None, dictionary_size]) W = tf.Variable(tf.zeros([dictionary_size, label_size])) b = tf.Variable(tf.zeros([label_size])) y = tf.nn.softmax(tf.matmul(x, W) + b) y_ = tf.placeholder(tf.float32, [None, label_size]) cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy) tf.initialize_all_variables().run() for i in range(1000): batch_xs, batch_ys = data_sets.train.next_batch(100) train_step.run({x: batch_xs, y_: batch_ys}) correct_prediction = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(accuracy.eval({x: data_sets.validation.inputs, y_: data_sets.validation.labels})) print("Elapsed time:", time.time() - start_time)
def evaluate(input_x, input_y): ''' 评价 文本分类 :return result:预测的结果,哪一维更大 accuracy:精确度 ''' graph = tf.Graph() with graph.as_default(), tf.Session() as sess: # 恢复模型 features = tf.placeholder(tf.int32, [None, cnnc.SEQUENCE_LENGTH]) labels = tf.placeholder(tf.int32, [None, cnnc.FLAGS.num_class]) logits = cnnc.inference(features) predictions = tf.arg_max(logits, 1) correct_predictions = tf.equal(predictions, tf.arg_max(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, dtype=tf.float32)) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("SUCESS") else: print("No checkpoint file found") result, accuracy = sess.run([predictions, accuracy], feed_dict={features: input_x, labels: input_y}) return result, accuracy
def num_correct_prediction(logits, labels): """Evaluate the quality of the logits at predicting the label. Return: the number of correct predictions """ correct = tf.equal(tf.arg_max(logits, 1), tf.arg_max(labels, 1)) correct = tf.cast(correct, tf.int32) n_correct = tf.reduce_sum(correct) return n_correct
def build_graph(self): x = tf.placeholder(tf.float32, [None, self.window_size, self.dim_word_feat], "x_input") y = tf.placeholder(tf.float32, [None, self.output_size], "label_input") W1 = self.weight_variable(shape=[2, self.dim_word_feat, 1, self.num_feat_map]) b1 = self.bias_variable(shape=[self.num_feat_map]) x_inputs = tf.reshape(x, [-1, self.window_size, self.dim_word_feat, 1]) # h_conv_1 size: [-1, dwf, ws, nfm] h_conv_1 = tf.nn.relu(self.conv_2d(x_inputs, W1) + b1) print h_conv_1.get_shape() # h_max_pool size: [-1, 1,1, nfm] h_max_pool = self.max_pool(h_conv_1) print h_max_pool.get_shape() # concentrate in none vector # sent_vec size: [-1, nfm] sent_vec = tf.reshape(h_max_pool, [-1, self.num_feat_map]) print sent_vec.get_shape() W2 = self.weight_variable(shape=[self.num_feat_map, self.output_size]) b2 = self.bias_variable(shape=[self.output_size]) logits = tf.matmul(sent_vec, W2) + b2 print logits.get_shape() outputs = tf.nn.softmax(logits) print outputs.get_shape() # window - level cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf.mul(y, tf.log(outputs)), reduction_indices=[1])) print cross_entropy.get_shape() # # sentence - level # y_label = tf.arg_max(y, 1) # ltm = self.label_transition_mat([self.output_size + 1, self.output_size]) # # score_golden = tf.reduce_sum(ltm[]) # log_add_score train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(cross_entropy) prediction = tf.arg_max(outputs, 1) ori_label = tf.arg_max(y, 1) accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, ori_label), tf.float32)) return dict( x=x, y=y, loss=cross_entropy, train=train_step, accuracy=accuracy, prediction=prediction, ori_label=ori_label )
def accuracy(logits, labels): """Evaluate the quality of the logits at predicting the label. Args: logits: Logits tensor, float - [batch_size, NUM_CLASSES]. labels: Labels tensor, """ with tf.name_scope('accuracy') as scope: correct = tf.equal(tf.arg_max(logits, 1), tf.arg_max(labels, 1)) correct = tf.cast(correct, tf.float32) accuracy = tf.reduce_mean(correct)*100.0 tf.summary.scalar(scope+'/accuracy', accuracy) return accuracy
def calc_reward(outputs): outputs = outputs[-1] # look at ONLY THE END of the sequence outputs = tf.reshape(outputs, (batch_size, cell_out_size)) h_a_out = weight_variable((cell_out_size, n_classes)) p_y = tf.nn.softmax(tf.matmul(outputs, h_a_out)) max_p_y = tf.arg_max(p_y, 1) correct_y = tf.cast(labels_placeholder, tf.int64) R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32) # reward per example reward = tf.reduce_mean(R) # overall reward p_loc = gaussian_pdf(mean_locs, sampled_locs) p_loc = tf.reshape(p_loc, (batch_size, glimpses * 2)) R = tf.reshape(R, (batch_size, 1)) J = tf.concat(1, [tf.log(p_y + 1e-5) * onehot_labels_placeholder, tf.log( p_loc + 1e-5) * R]) J = tf.reduce_sum(J, 1) J = tf.reduce_mean(J, 0) cost = -J optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.minimize(cost) return cost, reward, max_p_y, correct_y, train_op
def MLP(trainFeature, trainLabel, testFeature): N1 = trainFeature.shape[0] N2 = testFeature.shape[0] D = trainFeature.shape[1] x = tf.placeholder(tf.float32, [None, D]) W = tf.Variable(tf.zeros([D, 2])) b = tf.Variable(tf.zeros([2])) y = tf.nn.softmax(tf.matmul(x, W) + b) y_ = tf.placeholder(tf.float32, [None, 2]) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) init = tf.initialize_all_variables() label1 = np.zeros([N1, 2]) for item in range(N1): label1[item][trainLabel[item]] = 1 sess = tf.Session() sess.run(init) idx = [i for i in range(N1)] for i in range(100): randomSamples = random.sample(idx, 5) batch_xs = trainFeature[randomSamples, :] batch_ys = label1[randomSamples] sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) if i % 10 == 0: print(i, sess.run(W), sess.run(b)) #correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) predicted_label = tf.arg_max(y, 1) return(sess.run(predicted_label, feed_dict={x: testFeature}))
def train_neural_network(x): prediction = neural_network_model(x) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = prediction,labels = y)) optimizer = tf.train.AdamOptimizer().minimize(cost) #cycles of feed forward and back propagation hm_epochs = 10 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(hm_epochs): epoch_loss = 0 i = 0 while i < len(train_x): start = i end = i+batch_size batch_x = np.array(train_x[start:end]) batch_y = np.array(train_y[start:end]) _,c = sess.run([optimizer,cost],feed_dict = {x: batch_x,y: batch_y}) epoch_loss += c i += batch_size print('Epoch',epoch+1,'completed out of', hm_epochs,'loss:',epoch_loss) correct = tf.equal(tf.arg_max(prediction,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct,'float')) print('Accuracy: ',accuracy.eval({x:test_x,y:test_y}))
def train(mnist): x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input') regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) y = mnist_inference.inference(x, regularizer) global_step = tf.Variable(0, trainable=False) # 滑动平均操作 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 损失函数 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.arg_max(y_, 1)) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY) # 训练过程 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') # 初始化TF 持久化类 saver = tf.train.Saver() with tf.Session() as sess: tf.initialize_all_variables().run() for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys}) if i % 1000 == 0: print("After %d training step(s), loss on training " "batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
def infer(args): """ """ dataloader = DataLoader(args.input_dict) args.seq_length = dataloader.seq_length args.char_size = len(dataloader.char_vocab_dict) args.phvocab_size = len(dataloader.ph_vocab_dict) model = Model(args) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) ## ## initial state for the model ## state = sess.run(model.initial_state) dataloader.reset_batch_pointer() for n in xrange(dataloader.num_batches): b = dataloader.next_batch() x, y = b inx = np.array([sess.run(x), sess.run(x)]) feed = {model.input_data: inx} logits = sess.run(model.logits, feed_dict = feed) logits = tf.split(0, args.batch_size, logits) for res in logits: result = sess.run(tf.arg_max(res, 1)) print(result, [dataloader.ph_vocab_invdict[i] for i in result])
def test_i2v(): """Loads the i2v network and applies it to a test image. """ with tf.Session() as sess: net = get_i2v_model() tf.import_graph_def(net['graph_def'], name='i2v') g = tf.get_default_graph() names = [op.name for op in g.get_operations()] x = g.get_tensor_by_name(names[0] + ':0') softmax = g.get_tensor_by_name(names[-3] + ':0') from skimage import data img = preprocess(data.coffee())[np.newaxis] res = np.squeeze(softmax.eval(feed_dict={x: img})) print([(res[idx], net['labels'][idx]) for idx in res.argsort()[-5:][::-1]]) """Let's visualize the network's gradient activation when backpropagated to the original input image. This is effectively telling us which pixels contribute to the predicted class or given neuron""" pools = [name for name in names if 'pool' in name.split('/')[-1]] fig, axs = plt.subplots(1, len(pools)) for pool_i, poolname in enumerate(pools): pool = g.get_tensor_by_name(poolname + ':0') pool.get_shape() neuron = tf.reduce_max(pool, 1) saliency = tf.gradients(neuron, x) neuron_idx = tf.arg_max(pool, 1) this_res = sess.run([saliency[0], neuron_idx], feed_dict={x: img}) grad = this_res[0][0] / np.max(np.abs(this_res[0])) axs[pool_i].imshow((grad * 128 + 128).astype(np.uint8)) axs[pool_i].set_title(poolname)
def __init__(self, layer_sizes, layer_types, init_value_scale=1.0, uniform_init=False, verbose = True): ''' initialize network architecture :param layer_sizes: list type, layer sizes, e.g. a 3-layer network "784:256:10" :param layer_types: list type, hidden layer types, e.g. sigmoid/tanh or "sigmoid:tanh" for 2-hidden-layer network :param init_value_scale: int, scale for uniform initialization :param uniform_init: bool, true for uniform, gaussian otherwise :param verbose: bool, verbose :return: ''' self.verbose = verbose # input settings self.x = tf.placeholder(tf.float32, [None, layer_sizes[0]], name='input') self.y = tf.placeholder(tf.float32, [None, layer_sizes[-1]], name='truth') self.learning_rate = tf.placeholder(tf.float32, name='learningrate') self.momentum = tf.placeholder(tf.float32, name='momentum') # layers self.layers = [] # build multi-layer perceptron architecture if self.verbose: print('Building Multilayer Perceptron...') # forward pass and build output for idx in xrange(len(layer_sizes) - 1): n_input = layer_sizes[idx] n_output = layer_sizes[idx + 1] layer = Layer(n_input, n_output, layer_types[idx], init_value_scale, uniform_init) self.layers.append(layer) # forward net_output = self.x for idx in xrange(len(self.layers)): net_output = self.layers[idx].output(net_output) # cost function with ground truth provided, for training self.cost = self.layers[-1].neg_loglikelihood(net_output, self.y) # make prediction self.prediction = tf.arg_max(net_output, dimension=1) # prediction error self.prederr = tf.reduce_mean(tf.to_float(tf.not_equal(self.prediction, tf.arg_max(self.y, dimension=1)))) # training self.train_process = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.cost) # session self.sess = tf.Session()
def inference(x1, x2, mask1, mask2, l, y, args, embeddings, reuse=False, training=False): with tf.variable_scope('model', reuse=reuse): embed = tf.get_variable('embed', shape=embeddings.shape, initializer=tf.constant_initializer(embeddings)) embed1 = tf.nn.embedding_lookup(embed, x1) embed2 = tf.nn.embedding_lookup(embed, x2) keep = 1.0 - args.dropout_rate if training else 1.0 dropout1 = tf.nn.dropout(embed1, keep) dropout2 = tf.nn.dropout(embed2, keep) rnn_cell = {'gru': tf.contrib.rnn.GRUCell, 'lstm': tf.contrib.rnn.LSTMCell}[args.rnn_type] rnn1 = bidirectional_dynamic_rnn(dropout1, cell_fn=rnn_cell, n_hidden=args.hidden_size, sequence_length=retrieve_seq_length_op2(mask1), name='rnn1') rnn2 = bidirectional_dynamic_rnn(dropout2, cell_fn=rnn_cell, n_hidden=args.hidden_size, sequence_length=retrieve_seq_length_op2(mask2), return_last=True, name='rnn2') args.rnn_output_size = 2 * args.hidden_size att = BilinearAttention([rnn1, rnn2], args.rnn_output_size, mask1) z = tf.layers.dense(att, units=args.num_labels, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1), use_bias=False) prob = tf.nn.softmax(z) prob = prob * l prob /= tf.reduce_sum(prob, axis=1, keep_dims=True) pred = tf.to_int32(tf.arg_max(prob, dimension=1)) acc = tf.reduce_mean(tf.to_float(tf.equal(pred, y))) if not training: return acc else: epsilon = 1e-7 prob = tf.clip_by_value(prob, epsilon, 1 - epsilon) loss = tf.one_hot(y, depth=args.num_labels) * -tf.log(prob) loss = tf.reduce_sum(loss, axis=1) loss = tf.reduce_mean(loss) if args.optimizer == 'sgd': optimizer = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate) elif args.optimizer == 'adam': optimizer = tf.train.AdamOptimizer() elif args.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=args.learning_rate) else: raise NotImplementedError('optimizer = %s' % args.optimizer) train_op = optimizer.minimize(loss) return train_op, loss, acc
def _argmax(self, tensor): """ ArgMax Args: tensor : 2D - Tensor (Height x Width : 64x64 ) Returns: arg : Tuple of max position """ resh = tf.reshape(tensor, [-1]) argmax = tf.arg_max(resh, 0) return (argmax // tensor.get_shape().as_list()[0], argmax % tensor.get_shape().as_list()[0])
def calc_R_glimpse(output, correct_y): p_y = tf.nn.softmax(tf.matmul(output, Wa_h_a) + Ba_h_a) max_p_y = tf.arg_max(p_y, 1) # reward for all examples in the batch R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32) reward = tf.reduce_mean(R) # mean reward R = tf.reshape(R, (batch_size, 1)) R = tf.tile(R, [1, 2]) return R, reward, tf.log(p_y + SMALL_NUM) * onehot_labels_placeholder, max_p_y
def lable_pred(output): output = tf.reshape(output, (batch_size, cell_out_size)) with tf.variable_scope("pred", reuse = DO_SHARE): pred_tensor = linear(output, n_classes + 1) pred_tensor = tf.nn.softmax(pred_tensor) # batch_size * 11 pred = tf.arg_max(pred_tensor, 1) # (batch_size,) pred = tf.reshape(pred, (batch_size, 1)) return pred_tensor, pred
def calc_reward(outputs): outputs_tensor = tf.convert_to_tensor(outputs) outputs_tensor = tf.transpose(outputs_tensor, perm=[1, 0, 2]) b_weights_batch = tf.tile(b_weights, [10, 1, 1]) b = tf.sigmoid(tf.matmul(outputs_tensor, b_weights_batch)) b = tf.concat(axis=2, values=[b, b]) b = tf.reshape(b, (batch_size, glimpses * 2)) print(b.get_shape()) # consider the action at the last time step outputs = outputs[-1] # look at ONLY THE END of the sequence outputs = tf.reshape(outputs, (batch_size, cell_out_size)) # the hidden layer for the action network h_a_out = weight_variable((cell_out_size, n_classes)) # process its output p_y = tf.nn.softmax(tf.matmul(outputs, h_a_out)) max_p_y = tf.arg_max(p_y, 1) # the targets correct_y = tf.cast(labels_placeholder, tf.int64) # reward for all examples in the batch R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32) reward = tf.reduce_mean(R) # mean reward # p_loc = gaussian_pdf(mean_locs, sampled_locs) p_loc_orig = p_loc p_loc = tf.reshape(p_loc, (batch_size, glimpses * 2)) print(R) R = tf.reshape(R, (batch_size, 1)) R = tf.tile(R, [1, glimpses*2]) print(R) # 1 means concatenate along the row direction no_grad_b = tf.stop_gradient(b) J = tf.concat(axis=1, values=[tf.log(p_y + 1e-5) * onehot_labels_placeholder, tf.log(p_loc + 1e-5) * (R)]) print(J) # sum the probability of action and location J = tf.reduce_sum(J, 1) print(J) # average over batch J = tf.reduce_mean(J, 0) print(J) cost = -J #cost = cost + tf.square(tf.reduce_mean(R - b)) # Adaptive Moment Estimation # estimate the 1st and the 2nd moment of the gradients global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(1e-3, global_step, 1000, 0.95, staircase=True) optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.minimize(cost) return cost, reward, max_p_y, correct_y, train_op, b, tf.reduce_mean(b), tf.reduce_mean(R - b), p_loc_orig, p_loc
def body(loop_counter, accumulated_output_array, accumulated_logits_array, next_input, *queue_contents): next_logit, queue_updates = sub_predictor(next_input, queue_contents) gumbeled = next_logit[:, 0, :] - tf.log(-tf.log(tf.random_uniform((tf.shape(next_logit)[0], QUANT_LEVELS)))) sample_disc = tf.arg_max(gumbeled, 1) sample_cont = dequantizer(sample_disc, QUANT_LOWER, QUANT_UPPER, QUANT_LEVELS) accumulated_output_array = accumulated_output_array.write(loop_counter, sample_cont) accumulated_logits_array = accumulated_logits_array.write(loop_counter, next_logit[:, 0, :]) sample_cont = tf.expand_dims(sample_cont, 1) sample_cont = tf.expand_dims(sample_cont, 1) # sic next_input = tf.concat(2, (sample_cont, tf.ones_like(sample_cont))) return [loop_counter+1, accumulated_output_array, accumulated_logits_array, next_input] + queue_updates
def loop_function(prev, _): """function that feed previous model output rather than ground truth""" if output_projection is not None: prev = tf.nn.xw_plus_b(prev, output_projection[0], output_projection[1]) prev_symbol = tf.arg_max(prev, 1) emb_prev = tf.nn.embedding_lookup(embedding, prev_symbol) if not update_embedding: emb_prev = tf.stop_gradient(emb_prev) return emb_prev
def predict(self, x, response_type="classification"): with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, "/tmp/dfn.ckpt") if response_type == "classification": y = tf.arg_max(self.y_prediction, 1) elif response_type == "regression": y = self.y_prediction else: print("Wrong response_type") return y.eval(feed_dict={self.x: x, self.keep_prob: 1.0})
def body(input_one_hot, state, results): output, new_state = cell(input_one_hot, state) output_one_hot = tf.one_hot( tf.arg_max(output, dimension=1), vocabulary_size ) return ( output_one_hot, new_state, results.write(results.size(), output_one_hot) )
def Predict(self, data_x, test2=False): ''' Predict the classes for unseen data :) ''' predictions = tf.arg_max(self.y_conv,1) if test2: return( predictions.eval(feed_dict={self.x:data_x \ , self.keep_prob1: 1.0, self.keep_prob2:1.0, self.keep_prob3:1.0},session=self.Session)) else: return( predictions.eval(feed_dict={self.x:data_x \ , self.keep_prob: 1.0},session=self.Session))
def sample(self): with tf.name_scope('Concrete'): gumbel = self.sample_gumbel() noisy_logits = tf.div(gumbel + self.logits, self.temperature) soft_onehot = tf.nn.softmax(noisy_logits) argmax = tf.arg_max(soft_onehot, 0) hard_onehot = tf.one_hot(argmax, self.number_of_classes) stop_grad = tf.stop_gradient(hard_onehot - soft_onehot) # h = h - s + s differentiable_hard_onehot = tf.add(stop_grad, soft_onehot, name='onehot') return differentiable_hard_onehot
def full_model(lm, data, labels): output_logits = classifier(lm, data) output_probs = tf.nn.softmax(output_logits) with tf.name_scope('error'): cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output_logits, labels)) lm.summaries.scalar_summary('cross_entropy', cross_entropy) percent_error = 100.0 * tf.reduce_mean( tf.cast(tf.not_equal(tf.arg_max(output_logits, dimension=1), labels), tf.float32)) lm.summaries.scalar_summary('percent_error', percent_error) return output_probs, cross_entropy, percent_error
def _create_patch(sess, content_input, style_input, content_regions, style_regions, blur_mapping): dim = content_input.get_shape().as_list() h, w, d = dim[1], dim[2], dim[3] ph, pw = h - 2, w - 2 pn = ph * pw assert content_input.get_shape() == style_input.get_shape() real_style = style_input if content_regions is not None and style_regions is not None: assert content_regions.get_shape().as_list()[:3] == style_regions.get_shape().as_list()[:3] map_len = content_input.get_shape().as_list()[3] mapped_content = tf.concat(3, [content_input, tf.tile(content_regions, [1, 1, 1, map_len])]) mapped_style = tf.concat(3, [style_input, tf.tile(style_regions, [1, 1, 1, map_len])]) else: mapped_content = content_input mapped_style = real_style ot = time.time() patches = _slice_patches_np(sess, mapped_style) p_matrix = l2_normalise(patches, [0, 1, 2]) conv_var = tf.nn.conv2d(mapped_content, p_matrix, [1, 1, 1, 1], "VALID", use_cudnn_on_gpu=False) content_slice = _slice_patches_np(sess, mapped_content) norm_reduce_matrix = l2_norm(content_slice, [0, 1, 2], True) norm_reduce_matrix = tf.reshape(norm_reduce_matrix, [1, ph, pw, 1]) conv_var = conv_var / norm_reduce_matrix assert conv_var.get_shape().as_list() == [1, ph, pw, pn] if blur_mapping: # blur before max, may look more natural blur_size = 3 blur = tf.constant(1, tf.float32, [blur_size, blur_size, 1, 1]) / (blur_size ** 2) blur = tf.tile(blur, [1, 1, pn, 1]) conv_var = tf.nn.depthwise_conv2d(conv_var, blur, [1, 1, 1, 1], "SAME") max_arg = tf.arg_max(conv_var, 3) max_arg = tf.reshape(max_arg, [pn]) max_arg_out = sess.run(max_arg) if real_style is not mapped_style: real_patches = _slice_patches_np(sess, real_style) assert real_patches.get_shape().as_list() == [3, 3, d, pn] patches = real_patches print "mapping calculation finished:", time.time() - ot assert patches.get_shape().as_list() == [3, 3, d, pn] print "mapping finished:" return max_arg_out, patches
def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE predictions = tf.arg_max(y, 1) # END YOUR CODE return predictions
def predict(self, x): # session = tf.Session() # tf.initialize_all_variables().run(session=session) state = self.initial_state.eval(session=self.sess) feed_dict = {self.input_data: x, self.initial_state: state} logits, final_state = self.sess.run([self.logits, self.final_state], feed_dict) probs = tf.nn.softmax(logits) predict_result = tf.arg_max(probs, dimension=1) predict_result = self.sess.run(predict_result) return predict_result
flags = tf.app.flags.FLAGS with tf.gfile.FastGFile(os.path.join(flags.checkpointDir, 'output_graph.pb'), 'rb') as f: grapf_def = tf.GraphDef() grapf_def.ParseFromString(f.read()) final_result_tensor, jpeg_data_tensor, keep_prop = \ tf.import_graph_def(grapf_def, name='', return_elements=[ 'final_result:0', 'DecodeJpeg/contents:0', 'input/keep_prob:0' ]) sess = tf.Session() result = tf.arg_max(final_result_tensor, 1) test_image = tf.gfile.Glob(os.path.join(flags.buckets, '*.jpg')) output_file = tf.gfile.GFile(os.path.join(flags.checkpointDir, 'result.txt'), 'wb') output_label = tf.gfile.GFile( os.path.join(flags.checkpointDir, 'output_labels.txt'), 'r') label = [] for line in output_label: label.append(int(line)) for key, filename in enumerate(test_image): image_id = os.path.basename(filename).split('.')[0] image = tf.gfile.FastGFile(filename, 'rb').read() predict = sess.run(result, {jpeg_data_tensor: image, keep_prop: [1]})
loss0 = labelInput * tf.log(pred) loss1 = 0 for m in range(0, 100): for n in range(0, 10): loss1 = loss1 - loss0[m, n] loss = loss1 / 100 # train train = tf.train.GradientDescentOptimizer(0.01).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(100): images, labels = mnist.train.next_batch(500) sess.run(train, feed_dict={imageInput: images, labelInput: labels}) pred_test = sess.run(pred, feed_dict={ imageInput: mnist.test.images, labelInput: labels }) acc = tf.equal(tf.arg_max(pred_test, 1), tf.arg_max(mnist.test.labels, 1)) acc_float = tf.reduce_mean(tf.cast(acc, tf.float32)) acc_result = sess.run(acc_float, feed_dict={ imageInput: mnist.test.images, labelInput: mnist.test.labels }) print(acc_result)
# Helper helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, decoder_lengths, time_major=False) # Decoder decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state, output_layer=projection_layer) # Dynamic decoding outputs, state, _ = tf.contrib.seq2seq.dynamic_decode(decoder) logits = outputs.rnn_output pred = tf.arg_max(logits, dimension=-1) # Loss crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=decoder_outputs, logits=logits) train_loss = (tf.reduce_sum(crossent * target_weight) / batch_size) # Compute and optimize Gradient params = tf.trainable_variables() gradients = tf.gradients(train_loss, params) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5) # Optimization# optimizer = tf.train.AdamOptimizer(0.0002) update_step = optimizer.apply_gradients(zip(clipped_gradients, params))
W2 = tf.Variable(tf.random_uniform([HIDDEN_NO, OUTPUT_NO], -xavier(HIDDEN_NO), xavier(HIDDEN_NO))) #Biases b1 = tf.Variable(tf.zeros([HIDDEN_NO])) b2 = tf.Variable(tf.zeros([OUTPUT_NO])) #Output hidden = tf.matmul(data, W1) + b1 output = tf.matmul(hidden, W2) + b2 y = tf.nn.sparse_softmax_cross_entropy_with_logits(output, labels) #Training train_step = tf.train.AdamOptimizer(0.01).minimize(tf.reduce_mean(y)) #Testing correct = tf.equal(labels, tf.cast(tf.arg_max(output, 1), tf.int32)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) #Initializing init = tf.global_variables_initializer() ######################################### ########### #VALIDATION def trainRange(session, begin, end, flag = 0): ###### #flags #0 - single examples #1 - events #2 - files
fc1_layer, [fc2_input_size, fc2_size], keep_prob) fc3_input_size = fc2_layer.get_shape()[1:4].num_elements() fc3_layer = NetTool.create_fc_layer( fc2_layer, [fc3_input_size, fc3_size], keep_prob) out_layer = NetTool.create_fc_layer( fc3_layer, [fc3_size, class_num], keep_prob, use_relu=False) pred_Y = tf.nn.softmax(out_layer, name='pred_Y') loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=pred_Y)) optimizer = tf.train.AdamOptimizer().minimize(loss) # learning_rate=0.0001 temp = tf.equal(tf.arg_max(pred_Y, 1), tf.arg_max(Y, 1)) accuracy = tf.reduce_mean(tf.cast(temp, tf.float32)) print('开始加载训练数据集') trainSet = dataset.dataSet(filePath, classes, way='txt', txtPath=txtPath) print('开始加载测试数据集') txtFilePath = '/Volumes/Seagate Backup Plus Drive/服务外包/picture/2019-03-05/body1' testSet = dataset.dataSet(txtFilePath, classes, way='image') print('数据集加载完成') saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(10001): batchX, batchY, _ = trainSet.next_batch(batchSize) # print(type(batchX)) sess.run([optimizer], feed_dict={X: batchX, Y: batchY})
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2)) fc8b = tf.Variable(tf.zeros(nb_classes)) logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b) BATCH_SIZE = 128 EPOCHS = 10 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) loss_operation = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer() training_operation = optimizer.minimize(loss_operation, var_list=[fc8W, fc8b]) init_op = tf.initialize_all_variables() # Train and evaluate the feature extraction model. correct_prediction = tf.equal(tf.arg_max(logits, 1), labels) accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() def evaluate(X_data, y_data): num_examples = len(X_data) total_accuracy = 0 sess = tf.get_default_session() for offset in range(0, num_examples, BATCH_SIZE): end = offset + BATCH_SIZE batch_x, batch_y = X_data[offset:end], y_data[offset:end] accuracy = sess.run(accuracy_operation, feed_dict={ x: batch_x, labels: batch_y
def evaluate(data_dir=None, real_time=True): with tf.Graph().as_default(): if real_time: if data_dir is not None: eval_images, ls_filename = dt.data_input( data_dir, FLAGS.eval_batch_size, False, False) else: eval_images, ls_filename = dt.data_input( FLAGS.data_dir, FLAGS.eval_batch_size, False, False) else: if data_dir is not None: eval_images, real_lb = dt.data_input(data_dir, FLAGS.eval_batch_size, False) print(eval_images.get_shape()) else: eval_images, real_lb = dt.data_input(FLAGS.data_dir, FLAGS.eval_batch_size, False) print(eval_images.get_shape()) logits = vng_model.inference(eval_images) predict_label = tf.arg_max(logits, 1) init = tf.global_variables_initializer() # Load trained weights saver = tf.train.Saver(tf.global_variables()) coord = tf.train.Coordinator() with tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as sess: sess.run(init) threads = tf.train.start_queue_runners(sess, coord) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') if real_time: num_examples = len( [name_file for name_file in os.listdir(FLAGS.data_dir)]) num_iter = int( math.ceil(float(num_examples) / FLAGS.eval_batch_size)) path_output = os.path.join(FLAGS.output_dir, 'output.csv') with open(path_output, "wb") as f: writer = csv.writer(f) for idx in range(num_iter): eval_img, pre_label, ls_name = sess.run( [eval_images, predict_label, ls_filename]) if (idx + 1) * FLAGS.eval_batch_size <= num_examples: ls_name = [ name_file.split('/')[-1] for name_file in ls_name ] result_model = np.column_stack( (np.array(ls_name), np.array(pre_label))) else: if num_examples - idx * FLAGS.eval_batch_size > 0: last_element = num_examples - idx * FLAGS.eval_batch_size else: last_element = num_examples ls_name = [ name_file.split('/')[-1] for name_file in ls_name ] result_model = np.column_stack( (np.array(ls_name)[0:last_element], np.array(pre_label)[0:last_element])) writer.writerows(result_model) real_label = None else: eval_img, pre_label, real_label = sess.run( [eval_images, predict_label, real_lb]) coord.request_stop() coord.join(threads, stop_grace_period_secs=5) sess.close() return eval_img, pre_label, real_label
# This also makes training faster, less work to do! fc7 = tf.stop_gradient(fc7) # TODO: Add the final layer for traffic sign classification. shape = (fc7.get_shape().as_list()[-1], nb_classes ) # use this shape for the weight matrix print(shape) weights = tf.Variable(tf.truncated_normal(shape)) biases = tf.Variable(tf.zeros(nb_classes), dtype=tf.float32) logits = tf.matmul(fc7, weights) + biases probs = tf.nn.softmax(logits) preds = tf.arg_max(probs, dimension=1) # TODO: Define loss, training, accuracy operations. # HINT: Look back at your traffic signs project solution, you may # be able to reuse some the code. cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) loss_op = tf.reduce_mean(cross_entropy) train_op = tf.train.AdamOptimizer().minimize(loss_op, var_list=[weights, biases]) init_op = tf.global_variables_initializer() accuracy_op = tf.reduce_mean(tf.cast(tf.equal(labels, preds), tf.float32)) # TODO: Train and evaluate the feature extraction model.
def build_model(self, state, train=True, reuse=False): inputs = state[0] trade_rem = state[1] with tf.variable_scope(self.__name__, reuse=reuse): with tf.name_scope(PHASE): self.phase = tf.placeholder(dtype=tf.bool) with tf.variable_scope(INPUT_PARAMS, reuse=reuse): self.batch_size = tf.shape(inputs)[0] inputs = tf.reshape(inputs, shape=[ self.batch_size, self.params.split_size, self.params.window_size, self.params.num_channels ]) # self.debug1 = inputs with tf.variable_scope(CONV_LAYERS, reuse=reuse): window_size = self.params.window_size num_convs = len(self.params.filter_sizes) for i in range(0, num_convs): with tf.variable_scope(CONV_LAYERS_.format(i + 1), reuse=reuse): window_size = window_size - self.params.kernel_sizes[ i] + 1 inputs = self.conv2d_layer(inputs, self.params.filter_sizes[i], self.params.kernel_sizes[i], CONV_.format(i + 1), reuse) inputs = self.batch_norm_layer( inputs, self.phase, BATCH_NORM_.format(i + 1), reuse) inputs = tf.nn.relu(inputs) inputs = self.dropout_conv_layer( inputs, self.phase, self.params.conv_keep_prob, DROPOUT_CONV_.format(i + 1)) input_shape = tf.shape(inputs) inputs = tf.reshape(inputs, shape=[ self.batch_size, self.params.split_size, window_size * self.params.filter_sizes[-1] ]) # self.debug2 = inputs gru_cells = [] for i in range(0, self.params.gru_num_cells): cell = tf.contrib.rnn.GRUCell( num_units=self.params.gru_cell_size, reuse=reuse) if train: cell = tf.contrib.rnn.DropoutWrapper( cell, output_keep_prob=self.params.gru_keep_prob) gru_cells.append(cell) multicell = tf.contrib.rnn.MultiRNNCell(gru_cells) with tf.name_scope(DYNAMIC_UNROLLING): output, final_state = tf.nn.dynamic_rnn(cell=multicell, inputs=inputs, dtype=tf.float32) output = tf.unstack(output, axis=1)[-1] # self.debug3 = output ''' Append the information regarding the number of trades left in the episode ''' output = tf.stack([output, trade_rem], axis=0) with tf.variable_scope(FULLY_CONNECTED, reuse=reuse): num_dense_layers = len(self.params.dense_layer_sizes) for i in range(0, num_dense_layers): with tf.variable_scope(DENSE_LAYER_.format(i + 1), reuse=reuse): output = self.dense_layer( output, self.params.dense_layer_sizes[i], DENSE_.format(i + 1), reuse) output = self.batch_norm_layer( output, self.phase, BATCH_NORM_.format(i + 1), reuse) output = tf.nn.relu(output) output = self.dropout_dense_layer( output, self.phase, self.params.dense_keep_prob, DROPOUT_DENSE_.format(i + 1)) self._values = self.dense_layer(output, self.params.num_actions, Q_VALUES, reuse) with tf.name_scope(AVG_Q_SUMMARY): avg_q = tf.reduce_mean(self._values, axis=0) self._avg_q_summary = [] for idx in range(self.params.num_actions): self._avg_q_summary.append( tf.summary.histogram('q/{}'.format(idx), avg_q[idx])) self._avg_q_summary = tf.summary.merge(self._avg_q_summary, name=AVG_Q_SUMMARY) self._action = tf.arg_max(self._values, dimension=1, name=ACTION)
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight') b = tf.Variable(tf.random_normal([nb_classes]), name='bias') # define hypothesis logits = tf.matmul(X, W) + b # hypothesis = tf.nn.softmax(tf.matmul(X,W)+b) hypothesis = tf.nn.softmax(logits) # for Test # define cost cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_one_hot) cost = tf.reduce_mean(cost_i) # cost = tf.reduce_mean(-tf.reduce_sum(Y*tf.log(hypothesis), axis=1)) # define gradient gradient = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) # Calc Accuracy predict = tf.arg_max(hypothesis, 1) correct_predict = tf.equal(predict, tf.arg_max(Y_one_hot, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predict, dtype=tf.float32)) # init network sess = tf.Session() sess.run(tf.global_variables_initializer()) # go implement for step in range(2001): cost_, accu_, _ = sess.run([cost, accuracy, gradient], feed_dict={X: x_data, Y:y_data}) if step % 200 == 0: print("Step: {:5}\tLoss: {:.3f}\tAcc:{:.2%}".format(step, cost_, accu_) ) pred = sess.run(predict, feed_dict={X: x_data})
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.y, self.num_batch = get_batch_data( ) # (N=32, T=10), else: # inference self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) # define decoder inputs (decoder_inputs和y一样,除了第一列全为2) self.decoder_inputs = tf.concat( (tf.ones_like(self.y[:, :1]) * 2, self.y[:, :-1]), -1) # 2:<S> # Load vocabulary de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() # Encoder with tf.variable_scope("encoder"): ## Embedding (self.enc.shape = (batch_size=32, maxlen=10, hidden_units=512)) self.enc = embedding(self.x, vocab_size=len(de2idx), num_units=hp.hidden_units, scale=True, scope="enc_embed") ## Positional Encoding self.enc += embedding(tf.tile( tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0), [tf.shape(self.x)[0], 1]), vocab_size=hp.maxlen, num_units=hp.hidden_units, zero_pad=False, scale=False, scope="enc_pe") ## Dropout self.enc = tf.layers.dropout( self.enc, rate=hp.dropout_rate, training=tf.convert_to_tensor(is_training)) ## Blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): ### Multihead Attention self.enc = multihead_attention( queries=self.enc, keys=self.enc, num_units=hp.hidden_units, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, is_training=is_training, causality=False) ### Feed Forward self.enc = feedforward( self.enc, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Decoder with tf.variable_scope("decoder"): ## Embedding self.dec = embedding(self.decoder_inputs, vocab_size=len(en2idx), num_units=hp.hidden_units, scale=True, scope="dec_embed") ## Positional Encoding self.dec += embedding(tf.tile( tf.expand_dims(tf.range(tf.shape(self.decoder_inputs)[1]), 0), [tf.shape(self.decoder_inputs)[0], 1]), vocab_size=hp.maxlen, num_units=hp.hidden_units, zero_pad=False, scale=False, scope="dec_pe") ## Dropout self.dec = tf.layers.dropout( self.dec, rate=hp.dropout_rate, training=tf.convert_to_tensor(is_training)) ## Blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): ## Multihead Attention ( self-attention) self.dec = multihead_attention( queries=self.dec, keys=self.dec, num_units=hp.hidden_units, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, is_training=is_training, causality=True, scope="self_attention") ## Multihead Attention ( vanilla attention) 很关键 self.dec = multihead_attention( queries=self.dec, keys=self.enc, num_units=hp.hidden_units, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, is_training=is_training, causality=False, scope="vanilla_attention") ## Feed Forward self.dec = feedforward( self.dec, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Final linear projection self.logits = tf.layers.dense( self.dec, len(en2idx)) # (32,10,len(vocabulary)) self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1)) # (32,10) self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.acc = tf.reduce_sum( tf.to_float(tf.equal(self.preds, self.y)) * self.istarget) / (tf.reduce_sum(self.istarget)) tf.summary.scalar('acc', self.acc) if is_training: # Loss self.y_smoothed = label_smoothing( tf.one_hot(self.y, depth=len(en2idx))) self.loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_smoothed) self.mean_loss = tf.reduce_sum( self.loss * self.istarget) / (tf.reduce_sum(self.istarget)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
n_batch = mnist.train.num_examples // batch_size x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) #nn W = tf.Variable(tf.zeros([784, 10]), dtype=tf.float32) b = tf.Variable(tf.zeros([10]), dtype=tf.float32) prediction = tf.nn.softmax(tf.add(tf.matmul(x, W), b)) #均方误差 #loss = tf.reduce_mean(tf.square(y-prediction)) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=prediction)) train = tf.train.GradientDescentOptimizer(0.2).minimize(loss) correct = tf.equal(tf.arg_max(y, 1), tf.arg_max(prediction, 1)) accurancy = tf.reduce_mean(tf.cast(correct, tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(20): for batch in range(n_batch): batc_x, batch_y = mnist.train.next_batch(batch_size) sess.run(train, feed_dict={x: batc_x, y: batch_y}) acc = sess.run(accurancy, feed_dict={x: batc_x, y: batch_y}) print(" epoch: ", epoch, " ACC: ", acc, " loss: ", sess.run(loss, feed_dict={ x: batc_x, y: batch_y }))
def predict_next_gt(data, images_train, images_placeholder, training_time_placeholder, logits, sess): ''' Uses current network weights to segment images for next recursion After postprocessing, these are used as the ground truth for further training :param data: Data of the current recursion - if this is of recursion n, this function predicts ground truths for recursion n + 1 :param images_train: Numpy array of training images :param images_placeholder: Tensorflow placeholder for image feed :param training_time_placeholder: Boolean tensorflow placeholder :param logits: Logits operator for calculating segmentation mask probabilites :param sess: Tensorflow session :return: The data file for recursion n + 1 ''' #get recursion from filename recursion = utils.get_recursion_from_hdf5(data) new_recursion_fname = acdc_data.recursion_filepath(recursion + 1, data_file=data) if not os.path.exists(new_recursion_fname): fpath = os.path.dirname(data.filename) data.close() data = acdc_data.create_recursion_dataset(fpath, recursion + 1) else: data.close() data = h5py.File(new_recursion_fname, 'r+') #attributes to track processing prediction = data['predicted'] processed = data['predicted'].attrs.get('processed') if not processed: processed_to = data['predicted'].attrs.get('processed_to') scr_max = len(images_train) print("SCR max = " + str(scr_max)) for scr_idx in range(processed_to, scr_max, exp_config.batch_size): if scr_idx + exp_config.batch_size > scr_max: print("Entered last") # At the end of the dataset # Must ensure feed_dict is 20 images long however ind = list(range(scr_max - exp_config.batch_size, scr_max)) else: ind = list(range(scr_idx, scr_idx + exp_config.batch_size)) print(str(ind)) # logging.info('Saving prediction after recursion {0} for images {1} to {2} ' # .format(ind[0], ind[-1])) x = np.expand_dims(np.array(images_train[ind, ...]), -1) feed_dict = { images_placeholder: x, training_time_placeholder: False } softmax = tf.nn.softmax(logits) print("softmax") #threshold output of cnn if exp_config.cnn_threshold: threshold = tf.constant(exp_config.cnn_threshold, dtype=tf.float32) s = tf.multiply( tf.ones(shape=[exp_config.batch_size, 212, 212, 1]), threshold) softmax = tf.concat([s, softmax[..., 1:]], axis=-1) print("threshold") # if exp_config.use_crf: # #get unary from softmax # unary = tf.multiply(-1, tf.log(softmax)) # #calculate mask mask = tf.arg_max(softmax, dimension=-1) print("before sess") mask_out = sess.run(mask, feed_dict=feed_dict) print("after sess : " + str(mask_out)) #save to dataset for indice in range(len(ind)): prediction[ind[indice], ...] = np.squeeze(mask_out[indice, ...]) print("added " + str(ind[indice])) data['predicted'].attrs.modify('processed_to', scr_idx + exp_config.batch_size) if exp_config.reinit: logging.info("Initialising variables") sess.run(tf.global_variables_initializer()) data['predicted'].attrs.modify('processed', True) logging.info( 'Created unprocessed ground truths for recursion {}'.format( recursion + 1)) #Reopen in read only mode data.close() data = h5py.File(new_recursion_fname, 'r') return data
fc8W = tf.Variable(tf.truncated_normal(shape, stddev=1e-2)) fc8b = tf.Variable(tf.zeros(nb_classes)) logits = tf.nn.xw_plus_b(fc7, fc8W, fc8b) # TODO: Define loss, training, accuracy operations. # HINT: Look back at your traffic signs project solution, you may # be able to reuse some the code. cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) loss_op = tf.reduce_mean(cross_entropy) opt = tf.train.AdamOptimizer() # TODO: Train and evaluate the feature extraction model. train_op = opt.minimize(loss_op, var_list=[fc8W, fc8b]) init_op = tf.global_variables_initializer() preds = tf.arg_max(logits, 1) accuracy_op = tf.reduce_mean(tf.cast(tf.equal(preds, labels), tf.float32)) def eval(X, y, sess): total_acc = 0 total_loss = 0 for offset in range(0, X.shape[0], batch_size): end = offset + batch_size X_batch = X[offset:end] y_batch = y[offset:end] loss, acc = sess.run([loss_op, accuracy_op], feed_dict={ features: X_batch, labels: y_batch
flatten = tf.layers.flatten(pool2) print(flatten) #shape=(?, 1440) # fully-connected layer fc = tf.layers.dense(flatten, 400, activation=tf.nn.relu) print(fc) #shape=(?, 400) # DropOut를 추가하여 overfitting을 방지한다. dropout_fc = tf.layers.dropout(fc, dropout_placeholdr) print(dropout_fc) #shape=(?, 400) # output=3개 class logits = tf.layers.dense(dropout_fc, num_classes) print(logits) predicted_labels = tf.arg_max(logits, 1) # loss를 정의한다. losses = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(labels_placeholder, num_classes), #실제 label logits=logits #트레이닝 output label ) mean_loss = tf.reduce_mean(losses) # optimizer를 정의한다. optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(losses) saver = tf.train.Saver() with tf.Session() as sess:
def evaluate(sess, X, Y): predicted = tf.cast(tf.arg_max(inference(X), 1), tf.int32) print sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32)))
Created on Oct 21, 2016 @author: botpi ''' import tensorflow as tf import numpy as np import scipy.io from apiepi import * print "begin" x = tf.placeholder(tf.float32, [None, 256]) W = tf.placeholder(tf.float32, [256, 2]) b = tf.placeholder(tf.float32, [1, 2]) pred = tf.nn.softmax(tf.matmul(x, W) + b) predm = tf.arg_max(pred, 1) init = tf.initialize_all_variables() r = [] r.append(["File", "Class"]) for i in range(3): id = i+1 resp = scipy.io.loadmat("resp_%s" % id) images, labels, names = read_images("test_%s" % id) with tf.Session() as sess: sess.run(init) prob = pred.eval({x:images, W:resp["W"], b:resp["b"] }) probm = predm.eval({x:images, W:resp["W"], b:resp["b"] })
# MNIST data image of shape 28 * 28 = 784 X = tf.placeholder(tf.float32, [None, 784]) # 0 - 9 digits recognition = 10 classes Y = tf.placeholder(tf.float32, [None, nb_classes]) W = tf.Variable(tf.random_normal([784, nb_classes])) b = tf.Variable(tf.random_normal([nb_classes])) # Hypothesis (using softmax) hypothesis = tf.nn.softmax(tf.matmul(X, W) + b) cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) # Test model is_correct = tf.equal(tf.arg_max(hypothesis, 1), tf.arg_max(Y, 1)) # Calculate accuracy accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32)) # parameters training_epochs = 15 batch_size = 100 with tf.Session() as sess: # Initialize Tensorflow variables sess.run(tf.global_variables_initializer()) # Training cycle for epoch in range(training_epochs): avg_cost = 0 total_batch = int(mnist.train.num_examples / batch_size)
def train(imgs, labels, is_train, batch_size=128, epochs=5, train_f=1): thres = 0.3 alpha = 1 beta = 5 learning_rate = 0.001 smooth = 0.1 loss, optm = f_train(imgs, labels) pert = genernator(imgs, is_train=is_train) pert = tf.clip_by_value(pert, -thres, thres) # 结果过滤 g_outputs = tf.add(pert, imgs) g_outputs = tf.clip_by_value(g_outputs, 0, 1) d_outputs_real = discriminator(imgs) d_outputs_fake = discriminator(g_outputs) model(imgs) f_logits, f_outputs = model(g_outputs) # d_loss d_loss_real = tf.losses.mean_squared_error( predictions=d_outputs_real, labels=tf.ones_like(d_outputs_real) * (1 - smooth)) d_loss_fake = tf.losses.mean_squared_error( predictions=d_outputs_fake, labels=tf.zeros_like(d_outputs_fake)) d_loss = d_loss_real + d_loss_fake f_pred = tf.arg_max(f_outputs, 1) f_accr = tf.reduce_mean( tf.cast(tf.equal(f_pred, tf.arg_max(labels, 1)), tf.float32)) # L_hinge # L_hinge = tf.reduce_mean(tf.maximum(0.,tf.sqrt(2*tf.nn.l2_loss(pert))-thres)) zeros = tf.zeros((tf.shape(pert)[0])) L_hinge = tf.reduce_mean( tf.maximum( zeros, tf.norm(tf.reshape(pert, (tf.shape(pert)[0], -1)), axis=1) - thres)) # L_adv real = tf.reduce_sum(f_outputs * labels, 1) other = tf.reduce_max((1 - labels) * f_outputs - labels * 10000, axis=1) L_adv = tf.reduce_max(tf.maximum(0., other - real)) # g_loss g_loss_fake = tf.losses.mean_squared_error( predictions=d_outputs_fake, labels=tf.ones_like(d_outputs_fake) * (1 - smooth)) g_loss = L_adv + alpha * g_loss_fake + beta * L_hinge vars = tf.trainable_variables() f_vars = [var for var in vars if var.name.startswith("mf")] g_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="g") d_vars = [var for var in vars if var.name.startswith("d")] with tf.control_dependencies(tf.get_collection(tf.GraphKeys)): g_optm = tf.train.AdamOptimizer().minimize(g_loss, var_list=g_vars) d_optm = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( d_loss, var_list=d_vars) f_saver = tf.train.Saver(f_vars) g_saver = tf.train.Saver(g_vars) d_saver = tf.train.Saver(d_vars) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) batch_imgs, batch_labels = mnist.train.next_batch( batch_size=batch_size) batch_imgs = batch_imgs.reshape([-1, 28, 28, 1]) if train_f == 1: for e in range(epochs): cost_sum = 0 for i in range(mnist.train.num_examples // batch_size): batch_imgs, batch_labels = mnist.train.next_batch( batch_size) batch_imgs = batch_imgs.reshape([-1, 28, 28, 1]) _, cost = sess.run([optm, loss], feed_dict={ imgs: batch_imgs, labels: batch_labels }) cost_sum += cost print("%d/%d cost=%.4f" % (e + 1, epochs, cost_sum / batch_size)) f_saver.save(sess, "./checkpoint/advf.ckpt") else: f_saver.restore(sess, "./checkpoint/advf.ckpt") for e in range(epochs): g_cost_sum = 0 d_cost_sum = 0 L_Adv_sum = 0 L_hinge_sum = 0 total_batch = mnist.train.num_examples // batch_size for step in range(total_batch): fake_target = np.full((batch_size, ), 1) #定向 # for i in range(batch_size): fake_target.append(0) fake_labels = np.eye(10)[fake_target] # print(fake_labels) _, d_cost = sess.run([d_optm, d_loss], feed_dict={ imgs: batch_imgs, labels: fake_labels, is_train: True }) _, g_cost, adv_cost, hinge_cost = sess.run( [g_optm, g_loss, L_hinge, L_adv], feed_dict={ imgs: batch_imgs, labels: fake_labels, is_train: True }) g_cost_sum += g_cost d_cost_sum += d_cost L_Adv_sum += L_adv L_hinge_sum += L_hinge print( "%d/%d G:%.5f,D:%.5f,L_Adv=%.9f,L_hinge=%.9f" % (e + 1, epochs, g_cost / total_batch, d_cost / total_batch, adv_cost / total_batch, hinge_cost / total_batch)) g_saver.save(sess, "./checkpoint/gen.ckpt") d_saver.save(sess, "./checkpoint/d.ckpt") test_imgs, test_labels = mnist.test.next_batch(25) test_imgs = test_imgs.reshape([-1, 28, 28, 1]) samples = sess.run([g_outputs], feed_dict={ imgs: test_imgs, labels: test_labels, is_train: False })[0] plot_img(samples) print(sess.run(f_pred, feed_dict={imgs: samples, is_train: False}))
def evaluate(sess, X, Y): # evaluate the resulting trained model predicted = tf.cast(tf.arg_max(inference(X), 1), tf.int32) print "Accuracy: ", sess.run( tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32)))
def __init__(self, num_classes, word_vocab=None, char_vocab=None, POS_vocab=None, NER_vocab=None, dropout_rate=0.5, learning_rate=0.001, optimize_type='adam', lambda_l2=1e-5, with_word=True, with_char=True, with_POS=True, with_NER=True, char_lstm_dim=20, context_lstm_dim=100, aggregation_lstm_dim=200, is_training=True, filter_layer_threshold=0.2, MP_dim=50, context_layer_num=1, aggregation_layer_num=1, fix_word_vec=False, with_filter_layer=True, with_highway=False, with_lex_features=False, lex_dim=100, word_level_MP_dim=-1, sep_endpoint=False, end_model_combine=False, with_match_highway=False, with_aggregation_highway=False, highway_layer_num=1, with_lex_decomposition=False, lex_decompsition_dim=-1, with_left_match=True, with_right_match=True, with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True, use_options=False, num_options=-1, with_no_match=False, verbose=False): # ======word representation layer====== in_question_repres = [] in_passage_repres = [] self.question_lengths = tf.placeholder(tf.int32, [None]) self.passage_lengths = tf.placeholder(tf.int32, [None]) self.truth = tf.placeholder(tf.int32, [None]) # [batch_size] input_dim = 0 if with_word and word_vocab is not None: self.in_question_words = tf.placeholder( tf.int32, [None, None]) # [batch_size, question_len] self.in_passage_words = tf.placeholder( tf.int32, [None, None]) # [batch_size, passage_len] # self.word_embedding = tf.get_variable("word_embedding", shape=[word_vocab.size()+1, word_vocab.word_dim], initializer=tf.constant(word_vocab.word_vecs), dtype=tf.float32) word_vec_trainable = True cur_device = '/gpu:0' if fix_word_vec: word_vec_trainable = False cur_device = '/cpu:0' print('!!!shape=', word_vocab.word_vecs.shape) with tf.device(cur_device): self.word_embedding = tf.get_variable( "word_embedding", trainable=word_vec_trainable, initializer=tf.constant(word_vocab.word_vecs), dtype=tf.float32) in_question_word_repres = tf.nn.embedding_lookup( self.word_embedding, self.in_question_words) # [batch_size, question_len, word_dim] in_passage_word_repres = tf.nn.embedding_lookup( self.word_embedding, self.in_passage_words) # [batch_size, passage_len, word_dim] in_question_repres.append(in_question_word_repres) in_passage_repres.append(in_passage_word_repres) input_shape = tf.shape(self.in_question_words) batch_size = input_shape[0] question_len = input_shape[1] input_shape = tf.shape(self.in_passage_words) passage_len = input_shape[1] input_dim += word_vocab.word_dim if with_POS and POS_vocab is not None: self.in_question_POSs = tf.placeholder( tf.int32, [None, None]) # [batch_size, question_len] self.in_passage_POSs = tf.placeholder( tf.int32, [None, None]) # [batch_size, passage_len] # self.POS_embedding = tf.get_variable("POS_embedding", shape=[POS_vocab.size()+1, POS_vocab.word_dim], initializer=tf.constant(POS_vocab.word_vecs), dtype=tf.float32) self.POS_embedding = tf.get_variable("POS_embedding", initializer=tf.constant( POS_vocab.word_vecs), dtype=tf.float32) in_question_POS_repres = tf.nn.embedding_lookup( self.POS_embedding, self.in_question_POSs) # [batch_size, question_len, POS_dim] in_passage_POS_repres = tf.nn.embedding_lookup( self.POS_embedding, self.in_passage_POSs) # [batch_size, passage_len, POS_dim] in_question_repres.append(in_question_POS_repres) in_passage_repres.append(in_passage_POS_repres) input_shape = tf.shape(self.in_question_POSs) batch_size = input_shape[0] question_len = input_shape[1] input_shape = tf.shape(self.in_passage_POSs) passage_len = input_shape[1] input_dim += POS_vocab.word_dim if with_NER and NER_vocab is not None: self.in_question_NERs = tf.placeholder( tf.int32, [None, None]) # [batch_size, question_len] self.in_passage_NERs = tf.placeholder( tf.int32, [None, None]) # [batch_size, passage_len] # self.NER_embedding = tf.get_variable("NER_embedding", shape=[NER_vocab.size()+1, NER_vocab.word_dim], initializer=tf.constant(NER_vocab.word_vecs), dtype=tf.float32) self.NER_embedding = tf.get_variable("NER_embedding", initializer=tf.constant( NER_vocab.word_vecs), dtype=tf.float32) in_question_NER_repres = tf.nn.embedding_lookup( self.NER_embedding, self.in_question_NERs) # [batch_size, question_len, NER_dim] in_passage_NER_repres = tf.nn.embedding_lookup( self.NER_embedding, self.in_passage_NERs) # [batch_size, passage_len, NER_dim] in_question_repres.append(in_question_NER_repres) in_passage_repres.append(in_passage_NER_repres) input_shape = tf.shape(self.in_question_NERs) batch_size = input_shape[0] question_len = input_shape[1] input_shape = tf.shape(self.in_passage_NERs) passage_len = input_shape[1] input_dim += NER_vocab.word_dim if with_char and char_vocab is not None: self.question_char_lengths = tf.placeholder( tf.int32, [None, None]) # [batch_size, question_len] self.passage_char_lengths = tf.placeholder( tf.int32, [None, None]) # [batch_size, passage_len] self.in_question_chars = tf.placeholder( tf.int32, [None, None, None]) # [batch_size, question_len, q_char_len] self.in_passage_chars = tf.placeholder( tf.int32, [None, None, None]) # [batch_size, passage_len, p_char_len] input_shape = tf.shape(self.in_question_chars) batch_size = input_shape[0] question_len = input_shape[1] q_char_len = input_shape[2] input_shape = tf.shape(self.in_passage_chars) passage_len = input_shape[1] p_char_len = input_shape[2] char_dim = char_vocab.word_dim # self.char_embedding = tf.get_variable("char_embedding", shape=[char_vocab.size()+1, char_vocab.word_dim], initializer=tf.constant(char_vocab.word_vecs), dtype=tf.float32) self.char_embedding = tf.get_variable("char_embedding", initializer=tf.constant( char_vocab.word_vecs), dtype=tf.float32) in_question_char_repres = tf.nn.embedding_lookup( self.char_embedding, self.in_question_chars ) # [batch_size, question_len, q_char_len, char_dim] in_question_char_repres = tf.reshape( in_question_char_repres, shape=[-1, q_char_len, char_dim]) question_char_lengths = tf.reshape(self.question_char_lengths, [-1]) in_passage_char_repres = tf.nn.embedding_lookup( self.char_embedding, self.in_passage_chars ) # [batch_size, passage_len, p_char_len, char_dim] in_passage_char_repres = tf.reshape( in_passage_char_repres, shape=[-1, p_char_len, char_dim]) passage_char_lengths = tf.reshape(self.passage_char_lengths, [-1]) with tf.variable_scope('char_lstm'): # lstm cell char_lstm_cell = tf.contrib.rnn.BasicLSTMCell(char_lstm_dim) # dropout if is_training: char_lstm_cell = tf.contrib.rnn.DropoutWrapper( char_lstm_cell, output_keep_prob=(1 - dropout_rate)) char_lstm_cell = tf.contrib.rnn.MultiRNNCell([char_lstm_cell]) # question_representation question_char_outputs = my_rnn.dynamic_rnn( char_lstm_cell, in_question_char_repres, sequence_length=question_char_lengths, dtype=tf.float32 )[0] # [batch_size*question_len, q_char_len, char_lstm_dim] question_char_outputs = question_char_outputs[:, -1, :] question_char_outputs = tf.reshape( question_char_outputs, [batch_size, question_len, char_lstm_dim]) tf.get_variable_scope().reuse_variables() # passage representation passage_char_outputs = my_rnn.dynamic_rnn( char_lstm_cell, in_passage_char_repres, sequence_length=passage_char_lengths, dtype=tf.float32 )[0] # [batch_size*question_len, q_char_len, char_lstm_dim] passage_char_outputs = passage_char_outputs[:, -1, :] passage_char_outputs = tf.reshape( passage_char_outputs, [batch_size, passage_len, char_lstm_dim]) in_question_repres.append(question_char_outputs) in_passage_repres.append(passage_char_outputs) input_dim += char_lstm_dim in_question_repres = tf.concat(in_question_repres, 2) # [batch_size, question_len, dim] in_passage_repres = tf.concat(in_passage_repres, 2) # [batch_size, passage_len, dim] if is_training: in_question_repres = tf.nn.dropout(in_question_repres, (1 - dropout_rate)) in_passage_repres = tf.nn.dropout(in_passage_repres, (1 - dropout_rate)) else: in_question_repres = tf.multiply(in_question_repres, (1 - dropout_rate)) in_passage_repres = tf.multiply(in_passage_repres, (1 - dropout_rate)) mask = tf.sequence_mask(self.passage_lengths, passage_len, dtype=tf.float32) # [batch_size, passage_len] question_mask = tf.sequence_mask( self.question_lengths, question_len, dtype=tf.float32) # [batch_size, question_len] # ======Highway layer====== if with_highway: with tf.variable_scope("input_highway"): in_question_repres = match_utils.multi_highway_layer( in_question_repres, input_dim, highway_layer_num) tf.get_variable_scope().reuse_variables() in_passage_repres = match_utils.multi_highway_layer( in_passage_repres, input_dim, highway_layer_num) # ========Bilateral Matching===== if (not with_left_match) or (not with_right_match): if verbose: (match_representation, match_dim, self.all_repre) = match_utils.bilateral_match_func1( in_question_repres, in_passage_repres, self.question_lengths, self.passage_lengths, question_mask, mask, MP_dim, input_dim, with_filter_layer, context_layer_num, context_lstm_dim, is_training, dropout_rate, with_match_highway, aggregation_layer_num, aggregation_lstm_dim, highway_layer_num, with_aggregation_highway, with_lex_decomposition, lex_decompsition_dim, with_full_match, with_maxpool_match, with_attentive_match, with_max_attentive_match, with_left_match, with_right_match, verbose=verbose) else: (match_representation, match_dim) = match_utils.bilateral_match_func1( in_question_repres, in_passage_repres, self.question_lengths, self.passage_lengths, question_mask, mask, MP_dim, input_dim, with_filter_layer, context_layer_num, context_lstm_dim, is_training, dropout_rate, with_match_highway, aggregation_layer_num, aggregation_lstm_dim, highway_layer_num, with_aggregation_highway, with_lex_decomposition, lex_decompsition_dim, with_full_match, with_maxpool_match, with_attentive_match, with_max_attentive_match, with_left_match, with_right_match, verbose=verbose) else: (match_representation, match_dim) = match_utils.bilateral_match_func2( in_question_repres, in_passage_repres, self.question_lengths, self.passage_lengths, question_mask, mask, MP_dim, input_dim, with_filter_layer, context_layer_num, context_lstm_dim, is_training, dropout_rate, with_match_highway, aggregation_layer_num, aggregation_lstm_dim, highway_layer_num, with_aggregation_highway, with_lex_decomposition, lex_decompsition_dim, with_full_match, with_maxpool_match, with_attentive_match, with_max_attentive_match, with_left_match, with_right_match, with_no_match=with_no_match) #========Prediction Layer========= w_0 = tf.get_variable("w_0", [match_dim, match_dim / 2], dtype=tf.float32) b_0 = tf.get_variable("b_0", [match_dim / 2], dtype=tf.float32) if use_options: w_1 = tf.get_variable("w_1", [match_dim / 2, 1], dtype=tf.float32) b_1 = tf.get_variable("b_1", [1], dtype=tf.float32) else: w_1 = tf.get_variable("w_1", [match_dim / 2, num_classes], dtype=tf.float32) b_1 = tf.get_variable("b_1", [num_classes], dtype=tf.float32) logits = tf.matmul(match_representation, w_0) + b_0 logits = tf.tanh(logits) if is_training: logits = tf.nn.dropout(logits, (1 - dropout_rate)) else: logits = tf.multiply(logits, (1 - dropout_rate)) logits = tf.matmul(logits, w_1) + b_1 self.final_logits = logits if use_options: logits = tf.reshape(logits, [-1, num_options]) gold_matrix = tf.reshape(self.truth, [-1, num_options]) self.prob = tf.nn.softmax(logits) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, tf.cast(self.truth, tf.int64), name='cross_entropy_per_example') # self.loss = tf.reduce_mean(cross_entropy, name='cross_entropy') # gold_matrix = tf.one_hot(self.truth, num_classes, dtype=tf.float32) # gold_matrix = tf.one_hot(self.truth, num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=gold_matrix)) # correct = tf.nn.in_top_k(logits, self.truth, 1) # self.eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) correct = tf.equal(tf.argmax(logits, 1), tf.argmax(gold_matrix, 1)) self.correct = correct else: self.prob = tf.nn.softmax(logits) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, tf.cast(self.truth, tf.int64), name='cross_entropy_per_example') # self.loss = tf.reduce_mean(cross_entropy, name='cross_entropy') gold_matrix = tf.one_hot(self.truth, num_classes, dtype=tf.float32) # gold_matrix = tf.one_hot(self.truth, num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=gold_matrix)) correct = tf.nn.in_top_k(logits, self.truth, 1) self.eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) self.predictions = tf.arg_max(self.prob, 1) if optimize_type == 'adadelta': clipper = 50 optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) tvars = tf.trainable_variables() l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1]) self.loss = self.loss + lambda_l2 * l2_loss grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper) self.train_op = optimizer.apply_gradients(list(zip(grads, tvars))) elif optimize_type == 'sgd': self.global_step = tf.Variable( 0, name='global_step', trainable=False) # Create a variable to track the global step. min_lr = 0.000001 self._lr_rate = tf.maximum( min_lr, tf.train.exponential_decay(learning_rate, self.global_step, 30000, 0.98)) self.train_op = tf.train.GradientDescentOptimizer( learning_rate=self._lr_rate).minimize(self.loss) elif optimize_type == 'ema': tvars = tf.trainable_variables() train_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.loss) # Create an ExponentialMovingAverage object ema = tf.train.ExponentialMovingAverage(decay=0.9999) # Create the shadow variables, and add ops to maintain moving averages # of var0 and var1. maintain_averages_op = ema.apply(tvars) # Create an op that will update the moving averages after each training # step. This is what we will use in place of the usual training op. with tf.control_dependencies([train_op]): self.train_op = tf.group(maintain_averages_op) elif optimize_type == 'adam': clipper = 50 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) tvars = tf.trainable_variables() l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1]) self.loss = self.loss + lambda_l2 * l2_loss grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper) self.train_op = optimizer.apply_gradients(list(zip(grads, tvars))) extra_train_ops = [] train_ops = [self.train_op] + extra_train_ops self.train_op = tf.group(*train_ops)
b3 = tf.Variable(tf.random_normal([nb_classes])) # b3: 10-dimentional Vector # Layer 4 a4 = tf.matmul(a3, W3) + b3 hypothesis = a4 # m * 10(for Y) matrix # Cross-Entropy = D(S, L) = - ∑ L.i * log(S.i) # cross_entropy = -tf.reduce_sum(Y * tf.log(hypothesis), axis=1) # SUM ((m * 10) .* (m * 10), 세로로) => 1 * 10 matrix cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y) cost = tf.reduce_mean(cross_entropy) # 1 Scalor optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) # Test hypothesis is_currect = tf.equal(tf.arg_max(hypothesis, 1), tf.arg_max(Y, 1)) # m * 1 accuracy = tf.reduce_mean(tf.cast(is_currect, tf.float32)) # Parameter # epoch: 전체 데이터 m개를 모두 1회 학습시키는 것에 대한 단위, 1 epoch = 전체 한번 학습 training_epochs = 15 # batch size: 전체 데이터 m개를 분할하여 학습시키는 단위 batch_size = 100 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(training_epochs): avg_cost = 0 total_batch_count = int(mnist.train.num_examples / batch_size)
W = tf.Variable(tf.random_uniform([1, len(x_data)], -1.0, 1.0)) parm_list = [W] saver = tf.train.Saver(parm_list) h = tf.matmul(W, X) hypothesis = tf.div(1. ,1. + tf.exp(-h)) # sigmoid cost = -tf.reduce_mean(Y*tf.log(hypothesis) +(1-Y)*tf.log(1-hypothesis)) rate = 0.03 optimizer = tf.train.GradientDescentOptimizer(rate) train = tf.train.GradientDescentOptimizer(rate).minimize(cost) comp_pred = tf.equal(tf.arg_max(hypothesis, 1), tf.arg_max(Y, 1)) accuracy = tf.reduce_mean(tf.cast(comp_pred, dtype=tf.float32)) with tf.Session() as sess : saver.restore(sess, "/tmp/Training_model.ckpt") for step in range(500 + 1) : #if using user data, comment out this for-loop. _, loss,acc = sess.run( [train, cost, accuracy], feed_dict={X : x_data, Y : y_data}) if step % 10 == 0 : print("step :", step) print("loss :", loss) print("acc :", acc) #sess.run(train, feed_dict = {X : x_data, Y : y_data}) # if using user data, using this statement.
# output layer: softmax with tf.name_scope("softmax"): W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) h_fc2 = Wx_plus_b(h_fc1_drop, W_fc2, b_fc2) y_conv = tf.nn.softmax(h_fc2) # model training with tf.name_scope('cross_entropy'): cross_entropy = -tf.reduce_sum(y_in * tf.log(y_conv)) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) with tf.name_scope('accuracy'): # with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.arg_max(y_conv, 1), tf.arg_max(y_in, 1)) # with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session(graph=graph) as session: session.run(tf.initialize_all_variables()) writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph()) for i in range(learnStep): batch = mnist.train.next_batch(50) if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={x_in: batch[0], y_in: batch[1], keep_prob: 1.0}) print("step %d, training accuracy %g" % (i, train_accuracy)) train_step.run(feed_dict={x_in: batch[0], y_in: batch[1], keep_prob: 0.5})
# saver.restore(sess,'./model.ckpt') for i in range(1000): if start> 29200: start = 0 start = start+batch_size ex, ey = pp.load_batch(start,start+batch_size) if(len(ex)<1): continue; _, c = sess.run([optimizer, cost], feed_dict={x: ex, y: ey}) epoch_loss += c writer.add_summary(c, epoch * 1000 + i) #if i%100 == 0: # print("sub epoch ",i) print("Epoch : ", ep, " loss ", epoch_loss) saver.save(sess,'./model.ckpt') correct = tf.equal(tf.arg_max(prediction, 1), tf.arg_max(y, 1)) accu = tf.reduce_mean(tf.cast(correct, 'float')) tx,ty=pp.load_batch(testing=1) print('Accuracy : ', accu.eval({x: tx, y: ty})) #print(prediction.eval({x:tx[0].reshape(-1,400)})) return full array prediction=tf.arg_max(prediction,1) pred=prediction.eval({x:tx[1].reshape(-1,400)}) #return index print(pred) def test_nn(x): pre=nn_model(x) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess,'model.ckpt') img = cv2.imread('A.jpg', 0)#test # img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) _,img = cv2.threshold(img, 118, 255, 0)
# @Time : 2019/10/11 0011 14:43 # @Author :zhu # @File : mytf_10.py # @task description : import tensorflow as tf import numpy as np import warnings warnings.filterwarnings("ignore") t1 = tf.constant([[1, 2], [3, 4], [5, 6]]) t2 = tf.constant([1, 2, 5, 6]) t3 = tf.constant([[1, 2], [3, 4], [5, 6]]) t4 = [[6], [7]] t5 = [[6, 7, 9, 78, 23], [6, 7, 9, 78, 23]] t6 = tf.ones((6, )) result_2 = tf.arg_max(t5) # t3 = tf.multiply(t1, t2) result_1 = t1 - t3 # result_2 = tf.multiply(t1, t2) # result_3 = tf.reduce_sum(tf.reduce_sum(result_2, axis=1), axis=0) # t5 = tf.zeros([96, 30]) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) print(result_2.eval()) # print(np.shape(t_list)) # print(np.shape(t_list1)) # print(t_list1.eval()) # # print() # # print()
def train(mnist): x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input") # y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input") y_ = inference(x, None) y = inference(x, None, True) # used to store training cycles global_step = tf.Variable(0, trainable=False) # define EMA function to increase robustness when predict variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) # # forward propagation with moving average function # average_y = inference(x, variable_averages, weight1, biases1, weight2, biases2) average_y = inference(x, variable_averages, True) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.arg_max(y_, 1)) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.arg_max(y_, 1)) # calc cross_entropy mean for current batch cross_entropy_mean = tf.reduce_mean(cross_entropy) # calc L2 regularization loss function regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) with tf.variable_scope("", reuse=True): regularization = regularizer( tf.get_variable("layer1/weights", [INPUT_NODE, LAYER1_NODE])) + regularizer( tf.get_variable("layer2/weights", [LAYER1_NODE, OUTPUT_NODE])) loss = cross_entropy_mean + regularization # learning rate = learning rate * LEARNING_RATE_DECAY ^ (global_step / decay_step) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) # combine backward propagation and EMA value modification with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name="train") # correct_prediction = tf.equal(tf.arg_max(average_y, 1), tf.arg_max(y_, 1)) correct_prediction = tf.equal(tf.arg_max(y, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # prepare validation dataset to stop optimization validation_feed = { x: mnist.validation.images, y_: mnist.validation.labels } # define test dataset for final evaluation test_feed = {x: mnist.test.images, y_: mnist.test.labels} validation_result = range(TRAINING_STEPS / 1000) test_result = range(TRAINING_STEPS / 1000) for i in range(TRAINING_STEPS): if i % 1000 == 0: validate_acc = sess.run(accuracy, feed_dict=validation_feed) validation_result[i / 1000] = validate_acc # print "after %d training step(s), validation accuracy using average model is %g " % (i, validate_acc) xs, ys = mnist.train.next_batch(BATCH_SIZE) sess.run(train_op, feed_dict={x: xs, y_: ys}) test_acc = sess.run(accuracy, feed_dict=test_feed) test_result[i / 1000] = test_acc # print "after %d training step(s), test accuracy using average model is %g " % (i, test_acc) print validation_result print test_result saver = tf.train.Saver() saver.export_meta_graph("model.ckpt.meda.json", as_text=True) dispImg(validation_result, test_result, "with EMA") # img_vector = mnist.train.images[5] # img_length = int(np.sqrt(INPUT_NODE)) # img = np.ndarray([img_length, img_length]) # # print "image size: ", img_length, "*", img_length # for c in range(INPUT_NODE): # # print "image indices: ", c / img_length, "*", c % img_length # img[c / img_length][c % img_length] = img_vector[c] # plt.figure(num=2, figsize=(15, 8)) # plt.imshow(img) plt.show()
h1_units = 300 w1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1)) b1 = tf.Variable(tf.zeros([h1_units])) w2 = tf.Variable(tf.zeros([h1_units, 10])) b2 = tf.Variable(tf.zeros([10])) x = tf.placeholder(tf.float32, [None, in_units]) keep_prob = tf.placeholder(tf.float32) hidden1 = tf.nn.relu(tf.matmul(x, w1) + b1) hidden1_drop = tf.nn.dropout(hidden1, keep_prob) y = tf.nn.softmax(tf.matmul(hidden1_drop, w2) + b2) y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy) tf.global_variables_initializer().run() for i in range(3000): batch_xs, batch_ys = mnist.train.next_batch(100) train_step.run({x: batch_xs, y_: batch_ys, keep_prob: 0.75}) correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print( accuracy.eval({ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0 }))
tf.multiply(my_kernel, tf.multiply(b_vec_cross, y_target_cross)), [1, 2]) loss = tf.reduce_sum(tf.negative(tf.subtract(first_term, second_term))) #loss += regulation_rate * tf.nn.l2_loss(b) # Gaussian (RBF) prediction kernel rA = tf.reshape(tf.reduce_sum(tf.square(x_data), 1), [-1, 1]) rB = tf.reshape(tf.reduce_sum(tf.square(prediction_grid), 1), [-1, 1]) pred_sq_dist = tf.add( tf.subtract( rA, tf.multiply(2., tf.matmul(x_data, tf.transpose(prediction_grid)))), tf.transpose(rB)) pred_kernel = tf.exp(tf.multiply(gamma, tf.abs(pred_sq_dist))) #pred_kernel = tf.matmul(x_data, tf.transpose(x_data)) prediction_output = tf.matmul(tf.multiply(y_target, b), pred_kernel) prediction = tf.arg_max(prediction_output, 0) accuracy = tf.reduce_mean( tf.cast(tf.equal(prediction, tf.argmax(y_target, 0)), tf.float32)) global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.01 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 300, 0.5, staircase=True) #optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
#定义交叉熵代价函数 #调整比较合理,收敛比较快 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)) #使用梯度下降法 # train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss) train_step = tf.train.AdamOptimizer(1e-3).minimize(loss) #1e-3 0.001 #初始化变量 init = tf.global_variables_initializer() #结果存放在一个布尔型列表中 correct_prediction = tf.equal(tf.argmax(y, 1), tf.arg_max(prediction, 1)) #argmax返回一维张量最大值(也就是1,不是0)所在的位置 #求准确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #case(a,b)a转化为b类型 with tf.Session() as sess: sess.run(init) for epoch in range(20): for batch in range(n_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys}) acc = sess.run(accuracy, feed_dict={ x: mnist.test.images, y: mnist.test.labels })