def compute_accuracy(x, l, mask): """Compute model accuracy.""" preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask) if FLAGS.surrogate_attack: preds = sur_ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) acc_update_op = tf.tuple((acc_update_op, tf.metrics.accuracy(l, preds, weights=mask)[1])) sess.run(tf.initialize_local_variables()) for i in range(FLAGS.eval_steps): tf.logging.info( "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps)) acc = sess.run(acc_update_op) if FLAGS.surrogate_attack: tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1])) else: tf.logging.info("\tFinal acc: %.4f" % acc) return acc
def precision_recall(num_gbboxes, num_detections, tp, fp, scores, dtype=tf.float64, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict): d_precision = {} d_recall = {} for c in num_gbboxes.keys(): scope = 'precision_recall_%s' % c p, r = precision_recall(num_gbboxes[c], num_detections[c], tp[c], fp[c], scores[c], dtype, scope) d_precision[c] = p d_recall[c] = r return d_precision, d_recall # Sort by score. with tf.name_scope(scope, 'precision_recall', [num_gbboxes, num_detections, tp, fp, scores]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def _rev_layer_forward(xs, f, g, f_side_input, g_side_input, gate_outputs=False): """Forward for 1 reversible layer.""" x1, x2 = xs y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) if gate_outputs: return tf.tuple([y1, y2]) else: return (y1, y2)
def testAcceptTensorsAsControlInputs(self): with self.test_session(): var = tf.Variable(0) assign = tf.assign(var, 1) t, = tf.tuple([tf.constant(0)], control_inputs=[assign]) # Should trigger the assign. t.eval() self.assertEquals(1, var.eval())
def precision_recall(num_gbboxes, tp, fp, scope=None): """Compute precision and recall from true positives and false positives booleans arrays """ # Sort by score. with tf.name_scope(scope, 'precision_recall'): # Computer recall and precision. tp = tf.reduce_sum(tf.cast(tp, tf.float32), axis=0) fp = tf.reduce_sum(tf.cast(fp, tf.float32), axis=0) recall = tfe_math.safe_divide(tp, tf.cast(num_gbboxes, tf.float32), 'recall') precision = tfe_math.safe_divide(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def __init__(self, x, size, selectTrain, sess, toTarget=None, ts=0.001): self.sess = sess self.mean_x_train, self.variance_x_train = moments(x, [0]) #self.mean_x_ma, self.variance_x_ma = moments(self.x_splh, [0]) self.mean_x_ma = tf.Variable(tf.zeros([size])) self.variance_x_ma = tf.Variable(tf.ones([size])) self.update = tf.tuple([self.variance_x_ma.assign(0.95*self.variance_x_ma+ 0.05*self.variance_x_train)] , control_inputs=[self.mean_x_ma.assign(0.95*self.mean_x_ma+ 0.05*self.mean_x_train)])[0] self.mean_x_ma_update = tf.tuple([self.mean_x_train] , control_inputs=[])[0] self.printUp = tf.Print(self.mean_x_ma_update, [selectTrain], message="selectTrain value : ") self.variance_x_ma_update = tf.tuple([self.variance_x_train], control_inputs=[])[0] def getxmau(): return self.mean_x_ma_update def getxma(): return self.mean_x_ma def getvxmau(): return self.variance_x_ma_update def getvxma(): return self.variance_x_ma self.mean_x = tf.cond(selectTrain, getxmau, getxma) self.variance_x = tf.cond(selectTrain, getvxmau, getvxma) self.beta = tf.Variable(tf.zeros([size])) self.gamma = tf.Variable(tf.ones([size])) #tfs.tfs.session.run(tf.initialize_variables([self.beta, self.gamma]))#, self.mean_x_ma, self.variance_x_ma])) self.xNorm = tf.reshape(tf.nn.batch_norm_with_global_normalization(tf.reshape(x, [-1, 1, 1, size]), self.mean_x, self.variance_x, self.beta, self.gamma, 0.01, True), [-1, size]) if toTarget!=None: self.isTracking = toTarget self.updateBeta = self.beta.assign(self.beta*(1-ts)+self.isTracking.beta*ts) self.updateGamma = self.gamma.assign(self.gamma*(1-ts)+self.isTracking.gamma*ts) self.updateTarget = tf.group(self.updateBeta, self.updateGamma)
def create_graph (self): # shortcut to make a weight variable with truncated normal distribution def weight_variable (shape): initial = tf.truncated_normal (shape, stddev=0.1) return tf.Variable (initial) # shortcut for making bias variables with 0.1 starting constant def bias_variable (shape): initial = tf.constant (0.1, shape=shape) return tf.Variable (initial) grid_input_a = tf.reshape (self.inputs_a, [-1, 4, 4, 1]) grid_input_b = tf.reshape (self.inputs_b, [-1, 4, 4, 1]) filter_1 = weight_variable ([2, 2, 1, 16]) biases_1 = bias_variable ([16]) conv_1_a = tf.nn.conv2d (grid_input_a, filter=filter_1, strides=[1, 1, 1, 1], padding='VALID') + biases_1 conv_1_b = tf.nn.conv2d (grid_input_b, filter=filter_1, strides=[1, 1, 1, 1], padding='VALID') + biases_1 relu_1_a = tf.nn.relu (conv_1_a) relu_1_b = tf.nn.relu (conv_1_b) filter_2 = weight_variable ([2, 2, 16, 32]) biases_2 = bias_variable ([32]) conv_2_a = tf.nn.conv2d (relu_1_a, filter=filter_2, strides=[1, 1, 1, 1], padding='VALID') + biases_2 conv_2_b = tf.nn.conv2d (relu_1_b, filter=filter_2, strides=[1, 1, 1, 1], padding='VALID') + biases_2 relu_2_a = tf.nn.relu (conv_2_a) relu_2_b = tf.nn.relu (conv_2_b) side_length = 2 * 2 * 32 lin_a = tf.reshape (relu_2_a, [-1, side_length]) lin_b = tf.reshape (relu_2_b, [-1, side_length]) lin_all = tf.concat (1, [lin_a, lin_b]) lin_all_synced = tf.tuple ([lin_all]) [0] fc_1_w = weight_variable ([side_length * 2, 1024]) fc_1_b = bias_variable ([1024]) fc_1 = tf.matmul (lin_all_synced, fc_1_w) + fc_1_b fc_2_w = weight_variable ([1024, 2]) fc_2_b = bias_variable ([2]) fc_2 = tf.matmul (fc_1, fc_2_w) + fc_2_b self.readout = tf.nn.softmax (fc_2)
def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars, g_side_input): """Backprop for 1 layer.""" y1, y2 = ys grad_y1, grad_y2 = grad_ys # Reconstruct intermediates and inputs (x1, x2) # stop_gradients required on fn inputs to prevent infinite recursion into this # grad function on the calls to tf.gradients. y1_stop = tf.stop_gradient(y1) g_side_input = [tf.stop_gradient(t) for t in g_side_input] gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop) x2 = y2 - gy1 x2_stop = tf.stop_gradient(x2) f_side_input = [tf.stop_gradient(t) for t in f_side_input] fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop) x1 = y1 - fx2 # Compute gradients wrt to inputs # dL/dy2 * dG(y1)/y1 grad_gy1_y2 = tf.gradients(gy1, y1_stop, grad_y2)[0] grad_x1 = grad_y1 + grad_gy1_y2 grad_x2 = ( tf.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 + tf.gradients(fx2, x2_stop, grad_gy1_y2)[0]) # Compute gradients wrt to vars and side inputs in f and g grads1 = tf.gradients(gy1, g_vars + g_side_input, grad_y2) grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):] grads2 = tf.gradients(fx2, f_vars + f_side_input, grad_y1) grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):] grads3 = tf.gradients(fx2, f_vars + f_side_input, grad_gy1_y2) grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):] grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2) grad_f_side = _acc_grads(grad_f_side1, grad_f_side2) # Put returns in a tuple to ensure a constant memory budget (i.e. don't want # the subsequent layer to start computing and consuming memory based on a # subset of these values). outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side), (grad_g_vars, grad_g_side)) tupled = tf.tuple(tf.contrib.framework.nest.flatten(outputs)) return tf.contrib.framework.nest.pack_sequence_as(outputs, tupled)
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Sort by score. with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. dtype = tf.float64 tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def _get_step(self, inputs): Z, Y, X, theta, lmbd = self.inputs K, p = self.D.shape L = self.L with tf.name_scope("ISTA_iteration"): self.S = tf.constant(np.eye(K, dtype=np.float32) - self.S0/L, shape=[K, K], name='S') self.We = tf.constant(self.D.T/L, shape=[p, K], dtype=tf.float32, name='We') hk = tf.matmul(Y, self.S) + tf.matmul(X, self.We) self.step_FISTA = Zk = soft_thresholding(hk, lmbd/L) # self.theta_k = tk = (tf.sqrt(theta*theta+4) - theta)*theta/2 self.theta_k = tk = (1 + tf.sqrt(1 + 4*theta*theta))/2 dZ = tf.subtract(Zk, Z) # self.Yk = Zk + tk*(1/theta-1)*dZ self.Yk = Zk + (theta-1)/tk*dZ self.dz = tf.reduce_mean(tf.reduce_sum( dZ*dZ, reduction_indices=[1])) step = tf.tuple([Zk, tk, self.Yk]) return step, self.dz
def precision_recall_values(xvals, precision, recall, name=None): """Compute values on the precision/recall curve. Args: x: Python list of floats; precision: 1D Tensor decreasing. recall: 1D Tensor increasing. Return: list of precision values. """ with ops.name_scope(name, "precision_recall_values", [precision, recall]) as name: # Add bounds values to precision and recall. precision = tf.concat([[0.], precision, [0.]], axis=0) recall = tf.concat([[0.], recall, [1.]], axis=0) precision = tfe_math.cummax(precision, reverse=True) prec_values = [] for x in xvals: mask = tf.less_equal(recall, x) val = tf.reduce_min(tf.boolean_mask(precision, mask)) prec_values.append(val) return tf.tuple(prec_values)
def build_train_MSR_face_graph_multi_gpu(batch_size=64, num_gpus=4, latent_dims=1024, lr_g=1e-4, lr_c=5e-5, clamp_lower=-0.01, clamp_upper=0.01, use_gradient_penalty=True, stddev=0.02, norm_val=10): from data_utils import get_cartoon_faces with tf.device('/cpu:0'): phase = tf.placeholder(tf.bool) opt_g = tf.train.RMSPropOptimizer(lr_g) opt_c = tf.train.RMSPropOptimizer(lr_c) real_img = get_cartoon_faces(batch_size) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [real_img], capacity=2 * num_gpus) tower_grads_c = [] tower_grads_g = [] tower_c_losses = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpus): image_batch = batch_queue.dequeue() with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)): z = tf.random_uniform([batch_size, latent_dims], -1, 1) with tf.variable_scope('generator'): generate_img = generator( z, [4, 4, 1024], [64, 64, 3], tf.tanh, tf.random_normal_initializer(stddev=stddev), kernel_size=5, for_train=phase) tf.get_variable_scope().reuse_variables() fake_logit = critic( generate_img, 64, 4, tf.truncated_normal_initializer(stddev=stddev), kernel_size=5, reuse=(True if i >= 1 else False)) true_logit = critic( image_batch, 64, 4, tf.truncated_normal_initializer(stddev=stddev), kernel_size=5, reuse=True) tf.get_variable_scope().reuse_variables() theta_g = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') c_loss = tf.reduce_mean(fake_logit - true_logit, name='c_loss') g_loss = tf.reduce_mean(-fake_logit, name='g_loss') if use_gradient_penalty: alpha = tf.random_uniform( shape=[batch_size, 1, 1, 1], minval=0., maxval=1.) x_hat = generate_img * alpha + ( 1.0 - alpha) * image_batch d_hat = critic( x_hat, 64, 4, tf.truncated_normal_initializer(stddev=stddev), 5, True) gradients = tf.gradients(d_hat, [x_hat])[0] print(gradients) ddx = tf.sqrt( tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3])) ddx = tf.reduce_mean( tf.square(ddx - tf.constant(1, tf.float32)) ) * tf.constant(norm_val, tf.float32) c_loss = c_loss + ddx tower_c_losses.append(c_loss) tower_grads_c.append( opt_c.compute_gradients(c_loss, var_list=theta_c)) tower_grads_g.append( opt_g.compute_gradients(g_loss, var_list=theta_g)) print(tower_grads_c[-1]) exit(0) average_grads_c = average_gradients(tower_grads_c) average_grads_g = average_gradients(tower_grads_g) total_c_loss = average_loss(tower_c_losses) tf.summary.scalar("critic_loss", total_c_loss) # for g in average_grads_g: # variable_summaries(g[0],g[0].name) tf.summary.image('img', generate_img, max_outputs=10) variable_summaries(generate_img, 'generated_img') apply_gradient_c = opt_c.apply_gradients(average_grads_c) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # add dependency of updating moving statistics of batch normalization with tf.control_dependencies(update_ops): apply_gradient_g = opt_g.apply_gradients(average_grads_g) if not use_gradient_penalty: theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') clipped_var_c = [ tf.assign(var, tf.clip_by_value(var, clamp_lower, clamp_upper)) for var in theta_c ] # merge the clip operations on critic variables with tf.control_dependencies([apply_gradient_c]): apply_gradient_c = tf.tuple(clipped_var_c) return apply_gradient_g, apply_gradient_c, total_c_loss, phase
def get_state_update_op(state_variables, new_states): update_ops = [] for state_variable, new_state in zip(state_variables, new_states): update_ops.extend([state_variable[0] == new_state[0], state_variable[1] == new_state[1]]) return tf.tuple(update_ops)
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims( tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape( tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs, pret_inputs = vocabs[0].embedding_lookup( inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params) top_recur = embed_inputs = self.embed_concat(word_inputs + pret_inputs) for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur) with tf.variable_scope('MLP', reuse=reuse): dep_mlp, head_mlp = self.MLP(top_recur, self.class_mlp_size + self.attn_mlp_size, n_splits=2) dep_arc_mlp, dep_rel_mlp = dep_mlp[:, :, :self. attn_mlp_size], dep_mlp[:, :, self. attn_mlp_size:] head_arc_mlp, head_rel_mlp = head_mlp[:, :, :self. attn_mlp_size], head_mlp[:, :, self . attn_mlp_size:] with tf.variable_scope('Arcs', reuse=reuse): arc_logits = self.bilinear_classifier(dep_arc_mlp, head_arc_mlp) arc_output = self.output(arc_logits, targets[:, :, 1]) if moving_params is None: predictions = targets[:, :, 1] else: predictions = arc_output['predictions'] with tf.variable_scope('Rels', reuse=reuse): rel_logits, rel_logits_cond = self.conditional_bilinear_classifier( dep_rel_mlp, head_rel_mlp, len(vocabs[2]), predictions) rel_output = self.output(rel_logits, targets[:, :, 2]) rel_output['probabilities'] = self.conditional_probabilities( rel_logits_cond) output = {} output['probabilities'] = tf.tuple( [arc_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.pack( [arc_output['predictions'], rel_output['predictions']]) output['correct'] = arc_output['correct'] * rel_output['correct'] output['tokens'] = arc_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = arc_output['loss'] + rel_output['loss'] if self.word_l2_reg > 0: output['loss'] += word_loss output['embed'] = embed_inputs output['recur'] = top_recur output['dep_arc'] = dep_arc_mlp output['head_dep'] = head_arc_mlp output['dep_rel'] = dep_rel_mlp output['head_rel'] = head_rel_mlp output['arc_logits'] = arc_logits output['rel_logits'] = rel_logits return output
def gridlstm_def(self, rnn_input, seq_len): with tf.variable_scope('GridLSTM'): def gridlstm_cell(): return tf.contrib.rnn.GridLSTMCell(self.grid_num_units, use_peepholes=self.use_peepholes, feature_size=self.grid_feature_size, frequency_skip=self.grid_frequency_skip, num_frequency_blocks=[self.num_frequency_blocks], state_is_tuple=self.state_is_tuple, reuse=tf.get_variable_scope().reuse) cell = gridlstm_cell() ''' state_variables = [] for state_c, state_h in cell.zero_state(self.batch_size, tf.float32): state_variables.append(tf.contrib.rnn.LSTMStateTuple( tf.Variable(state_c, trainable=False), tf.Variable(state_h, trainable=False))) # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state rnn_tuple_state = tuple(state_variables) ''' state_value = tf.Variable( np.zeros((self.batch_size, self.grid_num_units),dtype=np.float32), trainable=False, dtype=tf.float32) #state_value = tf.constant( # np.zeros((self.batch_size,64),dtype=np.float32), # dtype=tf.float32) gridrnn_tuple_state = cell.state_tuple_type( *([state_value,state_value] * self.num_frequency_blocks)) # Build the RNN with tf.name_scope("GridLSTM"): rnn_outputs, new_states = tf.nn.dynamic_rnn(cell=cell, inputs=rnn_input, sequence_length=seq_len, initial_state=gridrnn_tuple_state, dtype=tf.float32, time_major=self.time_major) update_ops = [] for state_variable, new_state in zip(gridrnn_tuple_state, new_states): # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(new_state[0]), state_variable[1].assign(new_state[1])]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. gridrnn_keep_state_op = tf.tuple(update_ops) # Define an op to reset the hidden state to zeros update_ops = [] for state_variable in gridrnn_tuple_state: # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])), state_variable[1].assign(tf.zeros_like(state_variable[1]))]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. gridrnn_state_zero_op = tf.tuple(update_ops) if not self.time_major: rnn_outputs = tf.transpose(rnn_outputs, [1, 0, 2]) # [time, batch_size, cell_outdim] return rnn_outputs, gridrnn_keep_state_op, gridrnn_state_zero_op print(batch_size,self.proj_dim,self.output_size,seq_len.shape) logits = self.AffineTransform(rnn_outputs) return logits, gridrnn_keep_state_op, gridrnn_state_zero_op
def __init__(self, hidden_size = 75, embedding_size = 300, is_training= True): self.start_index = tf.placeholder(tf.int32, [None]) #[batch_size] self.stop_index = tf.placeholder(tf.int32, [None]) #[batch_size] #self.dropout_rate = tf.placeholder(tf.int32 , [1]) input_dim = 0 with tf.name_scope("word-rep"): self.question_repres = tf.placeholder(tf.float32, [None, None, embedding_size]) # [batch_size, question_len, word_dim] self.passage_repres = tf.placeholder(tf.float32, [None, None, embedding_size]) # [batch_size, passage_len, word_dim] self.question_lengths = get_length(self.question_repres) #[batch_size] self.passage_lengths = get_length(self.passage_repres) #[batch_size] input_shape = tf.shape(self.question_repres) batch_size = input_shape[0] batch_size = tf.cast(batch_size, tf.int32) question_len = input_shape[1] input_shape = tf.shape(self.passage_repres) passage_len = input_shape[1] input_dim += input_shape[2] """ if with_char and char_vocab is not None: self.question_char_lengths = tf.placeholder(tf.int32, [None,None]) # [batch_size, question_len] self.passage_char_lengths = tf.placeholder(tf.int32, [None,None]) # [batch_size, passage_len] self.question_chars = tf.placeholder(tf.int32, [None, None, None]) # [batch_size, question_len, q_char_len] self.passage_chars = tf.placeholder(tf.int32, [None, None, None]) # [batch_size, passage_len, p_char_len] input_shape = tf.shape(self.question_chars) batch_size = input_shape[0] question_len = input_shape[1] q_char_len = input_shape[2] input_shape = tf.shape(self.passage_chars) passage_len = input_shape[1] p_char_len = input_shape[2] char_dim = char_vocab.word_dim self.char_embedding = tf.get_variable("char_embedding", initializer=tf.constant(char_vocab.word_vecs), dtype=tf.float32) question_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.question_chars) # [batch_size, question_len, q_char_len, char_dim] question_char_repres = tf.reshape(question_char_repres, shape=[-1, q_char_len, char_dim]) question_char_lengths = tf.reshape(self.question_char_lengths, [-1]) passage_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.passage_chars) # [batch_size, passage_len, p_char_len, char_dim] passage_char_repres = tf.reshape(passage_char_repres, shape=[-1, p_char_len, char_dim]) passage_char_lengths = tf.reshape(self.passage_char_lengths, [-1]) with tf.variable_scope('char_lstm'): # lstm cell char_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(char_lstm_dim) # dropout if is_training: char_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(char_lstm_cell, output_keep_prob=(1 - dropout_rate)) char_lstm_cell = tf.nn.rnn_cell.MultiRNNCell([char_lstm_cell]) # question_representation question_char_outputs = my_rnn.dynamic_rnn(char_lstm_cell, question_char_repres, sequence_length=question_char_lengths,dtype=tf.float32)[0] # [batch_size*question_len, q_char_len, char_lstm_dim] question_char_outputs = question_char_outputs[:,-1,:] question_char_outputs = tf.reshape(question_char_outputs, [batch_size, question_len, char_lstm_dim]) tf.get_variable_scope().reuse_variables() # passage representation passage_char_outputs = my_rnn.dynamic_rnn(char_lstm_cell, passage_char_repres, sequence_length=passage_char_lengths,dtype=tf.float32)[0] # [batch_size*question_len, q_char_len, char_lstm_dim] passage_char_outputs = passage_char_outputs[:,-1,:] passage_char_outputs = tf.reshape(passage_char_outputs, [batch_size, passage_len, char_lstm_dim]) question_repres.append(question_char_outputs) passage_repres.append(passage_char_outputs) input_dim += char_lstm_dim question_repres = tf.concat(2, question_repres) # [batch_size, question_len, dim] passage_repres = tf.concat(2, passage_repres) # [batch_size, passage_len, dim] """ #if is_training: # self.question_repres = tf.nn.dropout(self.question_repres, (1 - self.dropout_rate)) # self.passage_repres = tf.nn.dropout(self.passage_repres, (1 - self.dropout_rate)) #else: # self.question_repres = tf.mul(self.question_repres, (1 - self.dropout_rate)) # self.passage_repres = tf.mul(self.passage_repres, (1 - self.dropout_rate)) passage_mask = tf.sequence_mask(self.passage_lengths, passage_len, dtype=tf.float32) # [batch_size, passage_len] question_mask = tf.sequence_mask(self.question_lengths, question_len, dtype=tf.float32) # [batch_size, question_len] # - sequence length helper function def seq_len(seq): seq_bool = tf.sign(tf.abs(seq)) return tf.reduce_sum(seq_bool, axis=-1) with tf.name_scope("q-p_encoder"): with tf.variable_scope("passage-encoder"): #W = tf.Variable(tf.truncated_normal(shape = [], stddev=0.05),name = "w") #b = tf.Variable(tf.constant(0.1, shape=[]),name="b") fcell = tf.contrib.rnn.BasicLSTMCell(hidden_size) bcell = tf.contrib.rnn.BasicLSTMCell(hidden_size) u_p,_ = tf.nn.dynamic_rnn(fcell, inputs = self.passage_repres,dtype= tf.float32, sequence_length = self.passage_lengths) with tf.variable_scope("question-encoder"): #W = tf.Variable(tf.truncated_normal(shape, stddev=0.05),name = "w") #b = tf.Variable(tf.constant(0.1, shape=[]),name="b") fcell = tf.contrib.rnn.BasicLSTMCell(hidden_size) bcell = tf.contrib.rnn.BasicLSTMCell(hidden_size) u_q,_ = tf.nn.dynamic_rnn(fcell, inputs =self.question_repres,dtype= tf.float32, sequence_length = self.question_lengths) # i : batch_number , k : question_len_number #unstacked_u_q = tf.unstack(u_q, axis = 0,num = 10) #unstacked_u_p = tf.unstack(u_p,axis = 0,num = 10) #print(unstacked_u_q) with tf.name_scope("q-p_attention"): lstm_m_cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size) def match_attention(k, q_i, p_i, len_q_i, state, batch_tensor): p_i_k = tf.reshape(p_i[k], [1, -1]) q_i_k = tf.slice(q_i, begin=[0,0], size=[len_q_i, hidden_size]) with tf.variable_scope('attn_weights'): w_s = tf.get_variable(shape=[hidden_size, hidden_size], name='w_s') w_t = tf.get_variable(shape=[hidden_size, hidden_size], name='w_t') w_m = tf.get_variable(shape=[hidden_size, hidden_size], name='w_m') w_e = tf.get_variable(shape=[hidden_size, 1], name='w_e') m_lstm_state = tf.reshape(state.h,[1,-1]) sum_m = tf.matmul(q_i_k, w_s) + tf.matmul(p_i_k, w_t) + tf.matmul(m_lstm_state, w_m) s_k = tf.matmul(tf.tanh(sum_m), w_e) exps = tf.reshape(tf.exp(s_k), [len_q_i]) alphas = exps / tf.reshape(tf.reduce_sum(exps, 0), [1]) a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0) a_k = tf.reshape(a_k, [1,hidden_size]) m_k = tf.concat([p_i_k , a_k], axis=1) with tf.variable_scope('lstm_m_step'): out, next_state = lstm_m_cell(inputs=m_k, state=state) batch_tensor = batch_tensor.write(k,out) k = tf.add(k,1) return k, q_i, p_i, len_q_i, next_state, batch_tensor def match_sentence(i, h_m_ta): #p_emb_i, h_emb_i = u_q[i], u_p[i] p_i = u_p[i] #q_i :[question_len,input_dim] , p_i:[passage_len,input_dim] q_i = u_q[i] len_q_i, len_p_i = seq_len(question_mask[i]), seq_len(passage_mask[i]) len_q_i = tf.cast(len_q_i, tf.int32) len_p_i = tf.cast(len_p_i, tf.int32) state = lstm_m_cell.zero_state(batch_size=1, dtype=tf.float32) batch_tensor = tf.TensorArray(dtype=tf.float32, size=tf.cast(len_q_i, tf.int32)) # inner loop k = tf.constant(0) c = lambda a, x, y, z, s, u: tf.less(a, tf.cast(len_q_i, tf.int32)) b = lambda a,x,y,z,s,u : match_attention(a,x,y,z,s,u) res = tf.while_loop(cond=c, body=b, loop_vars=(k, q_i, p_i, len_q_i, state, batch_tensor)) temp = tf.squeeze(res[-1].stack(),axis = 1) h_m_ta = h_m_ta.write(i, temp) i = tf.add(i,1) return i, h_m_ta with tf.variable_scope('lstm_matching'): h_m_ta = tf.TensorArray(dtype=tf.float32, size=batch_size) #h_m_ta = np.array([10,15,75]) c = lambda x ,y: tf.less(x, batch_size) b = lambda x ,y: match_sentence(x,y) i = tf.constant(0) h_m_res = tf.while_loop(cond=c, body=b, loop_vars = (i, h_m_ta)) v_p = h_m_res[-1].stack() with tf.name_scope("self-matching"): bilstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_size) def self_match_attention(t,p_i,len_p_i,state,batch_val): v_p_t = tf.reshape(p_i[t],[1,-1]) v_p = p_i with tf.variable_scope("w"): w_v_p = tf.get_variable(shape = [hidden_size, hidden_size], name = "w_v_p") w_v_p_ = tf.get_variable(shape = [hidden_size, hidden_size], name = "w_v_p_") w_v_e = tf.get_variable(shape = [hidden_size,1], name = "w_v_e") bilstm_state = tf.reshape(state.h,[1,-1]) sum_m = tf.matmul(v_p,w_v_p) sum_m += tf.matmul(v_p_t,w_v_p_) s_t = tf.matmul(tf.tanh(sum_m),w_v_e) exps = tf.reshape(tf.exp(s_t), [len_p_i]) alphas = exps / tf.reshape(tf.reduce_sum(exps, 0), [1]) a_t = tf.reduce_sum(p_i* tf.reshape(alphas, [len_p_i, 1]), 0) a_t = tf.reshape(a_t, [1,hidden_size]) m_t = tf.concat([a_t, v_p_t], axis=1) with tf.variable_scope('lstm_m_step'): out, next_state = bilstm_cell(inputs=m_t, state=state) batch_val = batch_val.write(t,out) t = tf.add(t,1) return t,p_i,len_p_i,next_state,batch_val def self_match_sentence(i,h): p_i = v_p[i] len_p_i = tf.cast(seq_len(passage_mask[i]),tf.int32) state = bilstm_cell.zero_state(batch_size =1, dtype = tf.float32) batch_val = tf.TensorArray(dtype=tf.float32, size=1) t = tf.constant(0) c = lambda a, x, y, z, s : tf.less(a, len_p_i) b = lambda a, x, y, z, s : self_match_attention(a, x, y, z, s) res = tf.while_loop(cond = c, body =b, loop_vars = (t,p_i,len_p_i,state,batch_val)) tem = tf.squeeze(res[-1].stack(),axis=1) h = h.write(i,tem) i = tf.add(i,1) return i,h with tf.name_scope("lstm_self_matching"): h = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x,y : tf.less(x,tf.cast(batch_size, tf.int32)) b = lambda x,y : self_match_sentence(x,y) i = tf.constant(0) res = tf.while_loop(cond = c, body = b, loop_vars = (i,h)) h_p = res[-1].stack() print(h_p) with tf.variable_scope("output_layer"): with tf.name_scope("intial_state"): with tf.variable_scope("par"): w_v_q = tf.get_variable(shape = [hidden_size, hidden_size],name = 'w_v_q') w_u_q = tf.get_variable(shape = [hidden_size, hidden_size],name = 'w_u_q') V_r_q = tf.get_variable(shape = [15, hidden_size],name = 'V_r_q') #15 : question_len e = tf.get_variable(shape = [hidden_size,1],name = 'e') shape_u_q = tf.shape(u_q) sum_m = tf.reshape(tf.matmul(tf.reshape(u_q,[-1,hidden_size]),w_u_q),shape_u_q) sum_m += tf.matmul(V_r_q,w_v_q) s = tf.matmul(tf.reshape(tf.tanh(sum_m),[-1,hidden_size]),e) # [bs*len,1] exps = tf.reshape(tf.exp(s), [-1, question_len]) alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1]) initial_s = tf.reduce_sum(u_q * tf.reshape(alphas, [-1, question_len, 1]), 1) #[batch_size,hidden_size] c_ = tf.zeros(shape = tf.shape(initial_s), dtype = tf.float32) with tf.name_scope("answer_recurrent_network"): answer_lstm = tf.contrib.rnn.BasicLSTMCell(hidden_size) predictions = [] shape_h_p = tf.shape(h_p) with tf.variable_scope('wi'): w_h_p = tf.get_variable(shape = [hidden_size,hidden_size], name = "w_h_p") w_h_a = tf.get_variable(shape = [hidden_size,hidden_size], name = "w_h_a") w_h_e = tf.get_variable(shape = [hidden_size,1], name = "w_h_e") for i in range(2): if(i==0): sum_m = tf.reshape(tf.reshape(tf.matmul(tf.reshape(h_p,[-1,hidden_size]),w_h_p),shape_h_p) + tf.matmul(initial_s,w_h_a),[-1,hidden_size]) s = tf.matmul(tf.tanh(sum_m),e) exps = tf.reshape(tf.exp(s), [-1, passage_len]) alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1]) predictions.append(alphas) alphas = tf.reshape(alphas, [-1,passage_len,1]) #a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0) input_a = tf.reduce_sum(h_p*alphas, 1) initial_s = tf.tuple([initial_s,initial_s]) else: sum_m = tf.reshape(tf.reshape(tf.matmul(tf.reshape(h_p,[-1,hidden_size]),w_h_p),shape_h_p) + tf.matmul(initial_s.h,w_h_a),[-1,hidden_size]) s = tf.matmul(tf.tanh(sum_m),e) exps = tf.reshape(tf.exp(s), [-1, passage_len]) alphas = exps / tf.reshape(tf.reduce_sum(exps, 1), [-1, 1]) predictions.append(alphas) alphas = tf.reshape(alphas, [-1,passage_len,1]) #a_k = tf.reduce_sum(q_i* tf.reshape(alphas, [len_q_i, 1]), 0) input_a = tf.reduce_sum(h_p*alphas, 1) _, initial_s = answer_lstm.call(input_a ,initial_s) with tf.name_scope("loss"): pred_start = predictions[0] # [batch_size, passage_len] pred_end = predictions[1] # [batch_size , passage_len] def calc_loss(pred, ind): loss = 0.0 for batch in pred: for i,val in enumerate(batch): if(i==ind): loss+= tf.log(float(val)) else: loss+= tf.log(1-float(val)) return loss self.loss = calc_loss(pred_start, self.start_index) + calc_loss(pred_end, self.stop_index) with tf.name_scope("accuracy"): correct_start = tf.equal(tf.argmax(pred_start, 1), self.start_index) self.accuracy_start = tf.reduce_mean(tf.cast(correct_start, 'float')) correct_stop = tf.equal(tf.argmax(pred_stop, 1), self.stop_index) self.accuracy_stop = tf.reduce_mean(tf.cast(correct_stop, 'float'))
def loop_body(i, *args): i += 1 per_image_loss = tf.reduce_mean(tf.square(g.callable_generator(tiled_z, False) - tiled_image_batch), axis=[1, 2, 3]) total_loss = tf.reduce_sum(per_image_loss) op = optimizer.minimize(total_loss, var_list=[tiled_z]) return tf.tuple([i, tiled_z, per_image_loss], control_inputs=[op])
def _run_network_test(self, network_fun, inputs, inf_type=spn.InferenceType.MARGINAL, log=False, on_gpu=True): """Run a single test for a single op.""" # Preparations op_name = network_fun.__name__ device_name = '/gpu:0' if on_gpu else '/cpu:0' # Print print2("--> %s: on_gpu=%s, inputs_shape=%s, inference=%s, log=%s" % (op_name, on_gpu, inputs.shape, ("MPE" if inf_type == spn.InferenceType.MPE else "MARGINAL"), log), self.file) # Compute true output true_out = self._true_output(network_fun, inputs, self.num_input_vals, self.num_mixtures, self.num_subsets, inf_type) # Create graph tf.reset_default_graph() with tf.device(device_name): # Create input inputs_pl = spn.IndicatorLeaf(num_vars=self.num_input_vars, num_vals=self.num_input_vals, name="iv_x") # Create networks, stacking one on top of the other, although each # network remains unconnected and independent of each other. start_time = time.time() root, init_network, network = \ network_fun(inputs_pl, self.num_input_vals, self.num_mixtures, self.num_subsets, inf_type, log) for _ in range(self.num_networks - 1): # The tuple ensures that the next network waits for the output # of the previous network, effectively stacking the networks # but using the original input every time root, init_network, network = \ network_fun(inputs_pl, self.num_input_vals, self.num_mixtures, self.num_subsets, inf_type, log, tf.tuple([network])[0]) setup_time = time.time() - start_time # Get num of SPN ops spn_size = root.get_num_nodes() * self.num_networks # Get num of graph ops tf_size = len(tf.get_default_graph().get_operations()) # Run op multiple times output_correct = True with tf.Session(config=tf.ConfigProto( allow_soft_placement=False, log_device_placement=self.log_devs)) as sess: # Initialize weights of all the sum node types in the graph start_time = time.time() init_network.run() weights_init_time = time.time() - start_time run_times = [] # Create feed dictionary feed = {inputs_pl: inputs} for n in range(self.num_runs): # Run start_time = time.time() out = sess.run(network, feed_dict=feed) run_times.append(time.time() - start_time) # Test value try: np.testing.assert_array_almost_equal((np.exp(out) if log else out), true_out) except AssertionError: output_correct = False self.test_failed = True if self.profile: # Add additional options to trace the session execution options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() out = sess.run(network, feed_dict=feed, options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json file fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() if not os.path.exists(self.profiles_dir): os.makedirs(self.profiles_dir) file_name = op_name file_name += ("_GPU" if on_gpu else "_CPU") file_name += ("_MPE-LOG" if log else "_MPE") if inf_type == \ spn.InferenceType.MPE else ("_MARGINAL-LOG" if log else "_MARGINAL") with open('%s/timeline_value_%s.json' % (self.profiles_dir, file_name), 'w') as f: f.write(chrome_trace) # Return stats return OpTestResult(op_name, on_gpu, spn_size, tf_size, setup_time, weights_init_time, run_times, output_correct)
def create_gradient_clipping(loss,optm,vars,clipVal=1.0): grads, vars = zip(*optm.compute_gradients(loss, var_list=vars)) grads = [None if grad is None else tf.clip_by_value(grad,-clipVal,clipVal) for grad in grads] op = optm.apply_gradients(zip(grads, vars)) train_op = tf.tuple([loss], control_inputs=[op]) return train_op[0]
def loop_fn(time, cell_output, cell_state, loop_state): """ Loop function that allows to control input to the rnn cell and manipulate cell outputs. :param time: current time step :param cell_output: output from previous time step or None if time == 0 :param cell_state: cell state from previous time step :param loop_state: custom loop state to share information between different iterations of this loop fn :return: tuple consisting of finished: tensor of size [bach_size] which is True for sequences that have reached their end, needed because of variable sequence size next_input: input to next time step next_cell_state: cell state forwarded to next time step emit_output: The first return argument of raw_rnn. This is not necessarily the output of the RNN cell,but could e.g. be the output of a dense layer attached to the rnn layer. next_loop_state: loop state forwarded to the next time step """ elements_finished = (time >= max_time) finished = tf.reduce_all(elements_finished) if cell_output is None: ''' time == 0, used for initialization before first call to cell This is just to defined the desired shape of the tensors ''' next_cell_state = cell.zero_state(batch_size, tf.float32) ''' the emit_output in this case tells TF how future emits look For the first call to loop_fn the emit_output corresponds to the emit_structure which is then used to determine the size of the zero_tensor for the emit_ta (defaults to cell.output_size). ''' emit_output = tf.tuple([ tf.zeros([output_dim]), tf.zeros([output_dim]), tf.zeros([output_dim]) ]) # tf.zeros([config.batch_size, output_dim], dtype=tf.float32) # tf.zeros([output_dim]) next_loop_state = output_ta ''' this is the initial step, i.e. there is no output from a previous time step, what we feed here can highly depend on the data. In this case we just assign the actual input in the first time step. ''' init_z = tf.zeros((batch_size, output_dim), dtype=tf.float32) #init_z = tf.random_normal((config.batch_size, output_dim), 0, 1, dtype=tf.float32) x_time = tf.layers.dropout(inputs_ta.read(time), rate=rate_x) next_in = tf.concat([x_time, init_z], 1) else: ''' t > 0, called right after call to cell, i.e. cell_output is the output from time t-1. here you can do whatever ou want with cell_output before assigning it to emit_output. In this case, we don't do anything pass the last state to the next ''' # next_cell_state = cell_state # emit_output = tf.tuple([mean, var, current_z]) # next_in = tf.cond(finished,lambda: tf.zeros([batch_size, rnn_input_dim], dtype=tf.float32), # next_loop_state = loop_state.write(time - 1,tf.concat([cell_state[0], cell_state[1]],1)) # next_input = tf.cond(finished, lambda: tf.zeros([batch_size, rnn_input_dim], dtype=tf.float32), lambda: next_in) # next_input.set_shape([None, rnn_input_dim]) return (finished, next_input, next_cell_state, emit_output, next_loop_state)
def train(dataset, initial_ckpt, learning_rate, logs_path, max_training_iters, save_step, display_step, global_step, iter_mean_grad=1, batch_size=1, momentum=0.9, resume_training=False, config=None, finetune=1): """Train network Args: dataset: Reference to a Dataset object instance initial_ckpt: Path to the checkpoint to initialize the network (May be parent network or pre-trained Imagenet) supervison: Level of the side outputs supervision: 1-Strong 2-Weak 3-No supervision learning_rate: Value for the learning rate. It can be number or an instance to a learning rate object. logs_path: Path to store the checkpoints max_training_iters: Number of training iterations save_step: A checkpoint will be created every save_steps display_step: Information of the training will be displayed every display_steps global_step: Reference to a Variable that keeps track of the training steps iter_mean_grad: Number of gradient computations that are average before updating the weights batch_size: momentum: Value of the momentum parameter for the Momentum optimizer resume_training: Boolean to try to restore from a previous checkpoint (True) or not (False) config: Reference to a Configuration object used in the creation of a Session finetune: Use to select to select type of training, 0 for the parent network and 1 for finetunning Returns: """ model_name = os.path.join(logs_path, "det_lesion.ckpt") if config is None: config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True tf.logging.set_verbosity(tf.logging.INFO) # Prepare the input data input_image = tf.placeholder(tf.float32, [batch_size, 80, 80, 3]) input_label = tf.placeholder(tf.float32, [batch_size]) is_training = tf.placeholder(tf.bool, shape=()) tf.summary.histogram('input_label', input_label) # Create the network with slim.arg_scope(det_lesion_arg_scope()): net, end_points = det_lesion_resnet(input_image, is_training_option=is_training) # Initialize weights from pre-trained model if finetune == 0: init_weights = load_resnet_imagenet(initial_ckpt) # Define loss with tf.name_scope('losses'): loss, output, target = binary_cross_entropy(net, input_label) total_loss = loss + tf.add_n(tf.losses.get_regularization_losses()) tf.summary.scalar('losses/total_loss', total_loss) tf.summary.histogram('losses/output', output) tf.summary.histogram('losses/target', target) # Define optimization method with tf.name_scope('optimization'): tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) #optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(total_loss) with tf.name_scope('grad_accumulator'): grad_accumulator = [] for ind in range(0, len(grads_and_vars)): if grads_and_vars[ind][0] is not None: grad_accumulator.append(tf.ConditionalAccumulator(grads_and_vars[0][0].dtype)) with tf.name_scope('apply_gradient'): grad_accumulator_ops = [] for ind in range(0, len(grad_accumulator)): if grads_and_vars[ind][0] is not None: var_name = str(grads_and_vars[ind][1].name).split(':')[0] var_grad = grads_and_vars[ind][0] if "weights" in var_name: aux_layer_lr = 1.0 elif "biases" in var_name: aux_layer_lr = 2.0 grad_accumulator_ops.append(grad_accumulator[ind].apply_grad(var_grad*aux_layer_lr, local_step=global_step)) with tf.name_scope('take_gradients'): mean_grads_and_vars = [] for ind in range(0, len(grad_accumulator)): if grads_and_vars[ind][0] is not None: mean_grads_and_vars.append((grad_accumulator[ind].take_grad(iter_mean_grad), grads_and_vars[ind][1])) apply_gradient_op = optimizer.apply_gradients(mean_grads_and_vars, global_step=global_step) with tf.name_scope('metrics'): acc_op = my_accuracy(net, input_label) tf.summary.scalar('metrics/accuracy', acc_op) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: tf.logging.info('Gathering update_ops') with tf.control_dependencies(tf.tuple(update_ops)): total_loss = tf.identity(total_loss) merged_summary_op = tf.summary.merge_all() # Initialize variables init = tf.global_variables_initializer() with tf.Session(config=config) as sess: print('Init variable') sess.run(init) # op to write logs to Tensorboard logs_path_train = os.path.join(logs_path,'train') logs_path_test = os.path.join(logs_path,'test') #summary_writer = tf.summary.FileWriter(logs_path + '/train', graph=tf.get_default_graph()) #test_writer = tf.summary.FileWriter(logs_path + '/test') summary_writer = tf.summary.FileWriter(logs_path_train, graph=tf.get_default_graph()) test_writer = tf.summary.FileWriter(logs_path_test) # Create saver to manage checkpoints saver = tf.train.Saver(max_to_keep=None) last_ckpt_path = tf.train.latest_checkpoint(logs_path) if last_ckpt_path is not None and resume_training: # Load last checkpoint print('Initializing from previous checkpoint...') saver.restore(sess, last_ckpt_path) step = global_step.eval() + 1 else: # Load pre-trained model if finetune == 0: print('Initializing from pre-trained imagenet model...') init_weights(sess) else: print('Initializing from pre-trained model...') # init_weights(sess) var_list = [] for var in tf.global_variables(): var_type = var.name.split('/')[-1] if 'weights' in var_type or 'bias' in var_type: var_list.append(var) saver_res = tf.train.Saver(var_list=var_list) saver_res.restore(sess, initial_ckpt) step = 1 sess.run(interp_surgery(tf.global_variables())) print('Weights initialized') print('Start training') while step < max_training_iters + 1: # Average the gradient for iter_steps in range(0, iter_mean_grad): batch_image, batch_label, x_bb_train, y_bb_train, ids_train = dataset.next_batch(batch_size, 'train', 0.5) batch_image_val, batch_label_val, x_bb_val, y_bb_val, ids_val = dataset.next_batch(batch_size, 'val', 0.5) image = preprocess_img(batch_image, x_bb_train, y_bb_train, ids_train) label = batch_label val_image = preprocess_img(batch_image_val, x_bb_val, y_bb_val) label_val = batch_label_val run_res = sess.run([total_loss, merged_summary_op, acc_op] + grad_accumulator_ops, feed_dict={input_image: image, input_label: label, is_training: True}) batch_loss = run_res[0] summary = run_res[1] acc = run_res[2] if step % display_step == 0: val_run_res = sess.run([total_loss, merged_summary_op, acc_op], feed_dict={input_image: val_image, input_label: label_val, is_training: False}) val_batch_loss = val_run_res[0] val_summary = val_run_res[1] val_acc = val_run_res[2] # Apply the gradients sess.run(apply_gradient_op) # Save summary reports summary_writer.add_summary(summary, step) if step % display_step == 0: test_writer.add_summary(val_summary, step) # Display training status if step % display_step == 0: print("{} Iter {}: Training Loss = {:.4f}".format(datetime.now(), step, batch_loss, file=sys.stderr)) print("{} Iter {}: Validation Loss = {:.4f}".format(datetime.now(), step, val_batch_loss, file=sys.stderr)) print("{} Iter {}: Training Accuracy = {:.4f}".format(datetime.now(), step, acc, file=sys.stderr)) print("{} Iter {}: Validation Accuracy = {:.4f}".format(datetime.now(), step, val_acc, file=sys.stderr)) # Save a checkpoint if step % save_step == 0: save_path = saver.save(sess, model_name, global_step=global_step) print("Model saved in file: %s" % (save_path)) step += 1 if (step-1) % save_step != 0: save_path = saver.save(sess, model_name, global_step=global_step) print("Model saved in file: %s" % (save_path)) print('Finished training.')
def _compute_gradients(self, cost): """Computes gradients. Args: cost: Loss function. Returns: grads_and_vars: List of tuple of gradients and variables. """ config = self.config if not config.manual_gradients: return super(RevNetModel, self)._compute_gradients(cost) log.warning("Manually building gradient graph.") g = tf.get_default_graph() tf.get_variable_scope().reuse_variables() num_stages = len(self.config.num_residual_units) beta_final = tf.get_variable("unit_last/final_bn/beta") gamma_final = tf.get_variable("unit_last/final_bn/gamma") w_final = tf.get_variable("logit/w") b_final = tf.get_variable("logit/b") filters = [ff for ff in self.config.filters] # Copy filter config. if config.use_bottleneck: res_func = self._bottleneck_residual_backward # For CIFAR-10 it's [16, 16, 32, 64] => [16, 64, 128, 256] for ii in range(1, len(filters)): filters[ii] *= 4 else: res_func = self._residual_backward grads_list = [] vars_list = [] var_final = [beta_final, gamma_final, w_final, b_final] h1, h2 = self._saved_hidden[-1] h1, h2 = tf.stop_gradient(h1), tf.stop_gradient(h2) h = _concat([h1, h2], axis=3) with tf.variable_scope("unit_last"): h = self._batch_norm("final_bn", h, add_ops=False) h = self._relu("final_relu", h) h = self._global_avg_pool(h) with tf.variable_scope("logit"): logits = self._fully_connected(h, config.num_classes) with tf.variable_scope("costs"): xent = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.label) cost = tf.reduce_mean(xent, name="xent") _grads = tf.gradients(cost, [h1, h2] + var_final, gate_gradients=True) dh1, dh2 = _grads[0], _grads[1] _grads = _grads[2:] # Injected dependency. with tf.control_dependencies(_grads): h_grad = (tf.identity(dh1), tf.identity(dh2)) grads_list.extend(_grads) # grads_list.extend(_grads[2:]) vars_list.extend(var_final) h1, h2 = self._saved_hidden[-1] h1, h2 = tf.stop_gradient(h1), tf.stop_gradient(h2) h = (h1, h2) # New version, using single for-loop. ss = num_stages - 1 ii = config.num_residual_units[ss] - 1 nlayers = sum(config.num_residual_units) for ll in range(nlayers - 1, -1, -1): no_activation = False if ii == 0: in_filter = filters[ss] stride = self._stride_arr(self.config.strides[ss]) if ss == 0: no_activation = True else: in_filter = filters[ss + 1] stride = self._stride_arr(1) out_filter = filters[ss + 1] with tf.variable_scope("unit_{}_{}".format(ss + 1, ii)): # Reconstruct input. if ii == 0: h = self._saved_hidden[ss] else: h = res_func(h, out_filter) # Rerun the layer, and get gradients. h_grad, w_list, w_grad = self._residual_grad( h, h_grad, in_filter, out_filter, stride, no_activation=no_activation) grads_list.extend(w_grad) vars_list.extend(w_list) # Counter. if ii == 0: ss -= 1 ii = config.num_residual_units[ss] - 1 else: ii -= 1 h_grad = _concat(h_grad, axis=3) w_init = tf.get_variable("init/init_conv/w") beta_init = tf.get_variable("init/init_bn/beta") gamma_init = tf.get_variable("init/init_bn/gamma") var_init = [beta_init, gamma_init, w_init] _grads = tf.gradients(h, var_init, h_grad) grads_list.extend(_grads) vars_list.extend(var_init) # Add weight decay. def add_wd(x): g, w = x[0], x[1] assert self._wd_hidden > 0.0, "Not applying weight decay" if w.name.endswith("w:0") and self._wd_hidden > 0.0: log.info("Adding weight decay {:.4e} for variable {}".format( self._wd_hidden, x[1].name)) return g + self._wd_hidden * w, w else: return g, w # Always gate gradients to avoid unwanted behaviour. return map(add_wd, zip(tf.tuple(grads_list), vars_list))
def __init__(self, *args, **kwargs): super(DataFlow, self).__init__(*args, **kwargs) self.pattern = 'tf_records_train/train*' cpu_device = '/cpu:0' # Preprocessing with tf.device(cpu_device): file_pattern = os.path.join(self.data_dir, self.pattern) record_input = RecordInput(file_pattern=file_pattern, seed=Record_seed, parallelism=32, buffer_size=4000, batch_size=self.batch_size, shift_ratio=0, name='record_input') records = record_input.get_yield_op() records = tf.split(records, self.batch_size, 0) records = [tf.reshape(record, []) for record in records] images = [] labels = [] for idx in xrange(self.batch_size): value = records[idx] if self.with_labels: image, label = self.parse_example_proto_and_process(value) labels.append(label) else: image = self.parse_example_proto_and_process(value) images.append(image) if self.with_labels: labels = tf.parallel_stack(labels, 0) labels = tf.reshape(labels, [self.batch_size]) images = tf.parallel_stack(images) images = tf.reshape(images, shape=[ self.batch_size, self.output_size, self.output_size, self.c_dim ]) if self.format == 'NCHW': images = tf.transpose(images, [0, 3, 1, 2]) images_shape = images.get_shape() if self.with_labels: labels_shape = labels.get_shape() image_producer_stage = StagingArea( dtypes=[tf.float32, tf.int32], shapes=[images_shape, labels_shape]) image_producer_op = image_producer_stage.put([images, labels]) image_producer_stage_get = image_producer_stage.get() images_and_labels = tf.tuple( [image_producer_stage_get[0], image_producer_stage_get[1]], control_inputs=[image_producer_op]) images = images_and_labels[0] labels = images_and_labels[1] else: image_producer_stage = StagingArea(dtypes=[tf.float32], shapes=[images_shape]) image_producer_op = image_producer_stage.put([images]) image_producer_stage_get = image_producer_stage.get()[0] images = tf.tuple([image_producer_stage_get], control_inputs=[image_producer_op])[0] self.images = images self.image_producer_op = image_producer_op if self.format == 'NCHW': self.shape = [self.c_dim, self.output_size, self.output_size] elif self.format == 'NHWC': self.shape = [self.output_size, self.output_size, self.c_dim] if self.with_labels: self.labels = labels
def _run_op_test(self, op_fun, inputs, indices=None, latent_indicators=None, inf_type=spn.InferenceType.MARGINAL, log=False, on_gpu=True): """Run a single test for a single op.""" # Preparations op_name = op_fun.__name__ device_name = '/gpu:0' if on_gpu else '/cpu:0' # Print print2( "--> %s: on_gpu=%s, inputs_shape=%s, indices=%s, latent_indicators=%s, inference=%s, log=%s" % (op_name, on_gpu, inputs.shape, ("No" if indices is None else "Yes"), ("No" if latent_indicators is None else "Yes"), ("MPE" if inf_type == spn.InferenceType.MPE else "MARGINAL"), log), self.file) input_size = inputs.shape[1] # Compute true output true_out = self._true_output(op_fun, inputs, indices, latent_indicators) # Create graph tf.reset_default_graph() with tf.device(device_name): # Create input inputs_pl = spn.RawLeaf(num_vars=input_size) # Create IndicatorLeaf if latent_indicators is None: latent_indicators_pl = [None for _ in range(self.num_sums)] else: if op_fun is Ops.sum: latent_indicators_pl = [ spn.IndicatorLeaf(num_vars=1, num_vals=input_size) for _ in range(self.num_sums) ] elif op_fun is Ops.par_sums or Ops.sums: latent_indicators_pl = [ spn.IndicatorLeaf(num_vars=self.num_sums, num_vals=input_size) ] # Create ops start_time = time.time() init_ops, ops = op_fun(inputs_pl, indices, latent_indicators_pl, self.num_sums, inf_type, log) for _ in range(self.num_ops - 1): # The tuple ensures that the next op waits for the output # of the previous op, effectively stacking the ops # but using the original input every time # init_ops, ops = op_fun(inputs_pl, indices, latent_indicators_pl, self.num_sums, # inf_type, log, tf.tuple([ops])[0]) init_ops, ops = op_fun(inputs_pl, indices, latent_indicators_pl, self.num_sums, inf_type, log, tf.tuple([ops[-1]])[0]) setup_time = time.time() - start_time # Get num of graph ops graph_size = len(tf.get_default_graph().get_operations()) # Run op multiple times output_correct = True with tf.Session(config=tf.ConfigProto( allow_soft_placement=False, log_device_placement=self.log_devs)) as sess: # Initialize weights of all the sum nodes in the graph start_time = time.time() init_ops.run() run_times = [] # Create feed dictionary feed = {inputs_pl: inputs} if latent_indicators is not None: for iv_pl in latent_indicators_pl: feed[iv_pl] = latent_indicators for n in range(self.num_runs): # Run start_time = time.time() out = sess.run(ops, feed_dict=feed) run_times.append(time.time() - start_time) # Test value try: np.testing.assert_array_almost_equal(out[0], true_out) except AssertionError: output_correct = False self.test_failed = True if self.profile: # Add additional options to trace the session execution options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() out = sess.run(ops, feed_dict=feed, options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json file fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() if not os.path.exists(self.profiles_dir): os.makedirs(self.profiles_dir) file_name = op_name file_name += ("_GPU" if on_gpu else "_CPU") file_name += ("_MPE-LOG" if log else "_MPE") if inf_type == \ spn.InferenceType.MPE else ("_MARGINAL-LOG" if log else "_MARGINAL") if indices is not None: file_name += "_Indices" if latent_indicators is not None: file_name += "_IVS" with open( '%s/timeline_path_%s.json' % (self.profiles_dir, file_name), 'w') as f: f.write(chrome_trace) # Return stats return OpTestResult(op_name, on_gpu, graph_size, ("No" if indices is None else "Yes"), ("No" if latent_indicators is None else "Yes"), setup_time, run_times, output_correct)
def buildModel(self, inputShape): #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2]) self.inputImage = node_variable(self.imageShape, "inputImage") self.V1_W = [] self.normalize_W = [] self.V1_A = [] self.V1_Y = [] self.oldA = [] self.oldY = [] self.randV1 = [] self.resetV1 = [] self.resetY = [] self.recon = [] self.error = [] self.reconError = [] self.sparseError = [] self.scaledInput = [] self.nnz = [] self.errorStd = [] self.l1_mean = [] self.t_errorStd = [] self.t_l1_mean = [] self.log_V1_A = [] self.WShape = [] self.VShape = [] self.inShape = [] for l in range(self.numLayers): if l == 0: numInF = inputShape[2] else: numInF = self.numV[l-1] V_Y = float(inputShape[0]) V_X = float(inputShape[1]) for i in range(l+1): V_Y_Prev = V_Y V_X_Prev = V_X assert(int(V_Y) % self.VStrideY[i] == 0) assert(int(V_X) % self.VStrideX[i] == 0) V_Y = V_Y/self.VStrideY[i] V_X = V_X/self.VStrideX[i] V_Y = int(V_Y) V_Y_Prev = int(V_Y_Prev) V_X = int(V_X) V_X_Prev = int(V_X_Prev) self.WShape.append((self.patchSizeY[l], self.patchSizeX[l], numInF, self.numV[l])) self.VShape.append((self.batchSize, V_Y, V_X, self.numV[l])) self.inShape.append((self.batchSize, V_Y_Prev, V_X_Prev, numInF)) with tf.name_scope("Dictionary"): self.V1_W.append(weight_variable_xavier(self.WShape[l], "V1_W"+str(l), conv=True)) with tf.name_scope("weightNorm"): self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W[l]), reduction_indices=[0, 1, 2], keep_dims=True)) self.normalize_W.append(self.V1_W[l].assign(self.V1_W[l]/(self.normVals+1e-8))) with tf.name_scope("FISTA"): #Soft threshold self.V1_A.append(weight_variable(self.VShape[l], "V1_A"+str(l), 1e-3)) self.V1_Y.append(weight_variable(self.VShape[l], "V1_Y"+str(l), 1e-3)) self.oldA.append(weight_variable(self.VShape[l], "oldA"+str(l), 1e-3)) self.oldY.append(weight_variable(self.VShape[l], "oldY"+str(l), 1e-3)) self.T = tf.Variable(1.0, "T") self.oldT = tf.Variable(1.0, "oldT") self.randV1.append(tf.truncated_normal(self.VShape[l], mean=0, stddev=1e-3)) #Reassign nodes self.resetV1.append(self.V1_A[l].assign(self.randV1[l])) self.resetY.append(self.V1_Y[l].assign(self.V1_A[l])) self.resetT = self.T.assign(1.0) with tf.name_scope("Recon"): assert(self.VStrideY[l] >= 1) assert(self.VStrideX[l] >= 1) #We build index tensor in numpy to gather self.recon.append(conv2d_oneToMany(self.V1_A[l], self.V1_W[l], self.inShape[l], "recon", self.VStrideY[l], self.VStrideX[l])) with tf.name_scope("Error"): #Scale inputImage if(l == 0): #self.scaledInput.append(self.inputImage/np.sqrt(self.patchSizeX[0]*self.patchSizeY[0]*inputShape[2])) self.scaledInput.append(self.inputImage) else: #self.scaledInput.append(self.V1_A[l-1]/np.sqrt(self.patchSizeX[l]*self.patchSizeY[l]*self.numV[l-1])) self.scaledInput.append(self.V1_A[l-1]) self.error.append(self.scaledInput[l] - self.recon[l]) with tf.name_scope("Loss"): self.reconError.append(tf.reduce_mean(tf.reduce_sum(tf.square(self.error[l]), reduction_indices=[1, 2, 3]))) self.sparseError.append(tf.reduce_mean(tf.reduce_sum(tf.abs(self.V1_A[l]), reduction_indices=[1, 2, 3]))) with tf.name_scope("stats"): self.nnz.append(tf.reduce_mean(tf.cast(tf.not_equal(self.V1_A[l], 0), tf.float32))) eStd = tf.sqrt(tf.reduce_mean(tf.square(self.error[l] - tf.reduce_mean(self.error[l])))) inStd = tf.sqrt(tf.reduce_mean(tf.square(self.scaledInput[l] - tf.reduce_mean(self.scaledInput[l])))) self.errorStd.append(eStd/inStd) self.l1_mean.append(tf.reduce_mean(tf.abs(self.V1_A[l]))) #For log of activities self.log_V1_A.append(tf.log(tf.abs(self.V1_A[l])+1e-15)) with tf.name_scope("Loss"): #Define loss self.reconLoss = self.reconError[0]/2 for l in range(1, self.numLayers): self.reconLoss += self.reconError[l]/2 self.loss = self.reconLoss for l in range(self.numLayers): self.loss += self.thresh[l] * self.sparseError[l] with tf.name_scope("Opt"): ##Define optimizer #self.reconGrad = self.learningRateA * tf.gradients(self.reconLoss, self.V1_A) self.reconGrads = tf.gradients(self.reconLoss, self.V1_A) #Store old values in tensors #This is to avoid updating a variable too early to affect new values assignList = [] for l in range(self.numLayers): assignList.append(self.oldA[l].assign(self.V1_A[l])) assignList.append(self.oldY[l].assign(self.V1_Y[l])) assignList.append(self.oldT.assign(self.T)) self.optimizerA0 = tf.tuple(assignList) optimizerList = [] newT = (1+tf.sqrt(4*tf.square(self.oldT)))/2 for l in range(self.numLayers): newA = tf.nn.relu(tf.abs(self.oldY[l] - self.learningRateA[l] * self.reconGrads[l]) - self.thresh[l]*self.learningRateA[l]) * tf.sign(self.oldA[l]) newY = newA + ((self.oldT-1)/(newT+1e-8))*(newA-self.oldA[l]) #We update actual variables optimizerList.append(self.V1_Y[l].assign(newY)) optimizerList.append(self.V1_A[l].assign(newA)) optimizerList.append(self.T.assign(newT)) self.optimizerA = tf.tuple(optimizerList) optWList = [] for l in range(self.numLayers): optWList.append(tf.train.AdadeltaOptimizer(self.learningRateW[l], epsilon=1e-6).minimize(self.loss, var_list= [self.V1_W[l]] )) self.optimizerW = tf.group(*optWList) with tf.name_scope("ReconVis"): self.visRecon = [] self.t_visRecon = [] for l in range(self.numLayers): outRecon = self.recon[l] for ll in range(l)[::-1]: #We prob recons down layers outRecon = conv2d_oneToMany(outRecon, self.V1_W[ll], self.inShape[ll], "recon_"+str(l)+"_"+str(ll), self.VStrideY[ll], self.VStrideX[ll]) self.visRecon.append(outRecon) with tf.name_scope("WeightVis"): self.visWeight = [] for l in range(self.numLayers): outWeight = tf.transpose(self.V1_W[l], [3, 0, 1, 2]) numN = self.WShape[l][3] numY = self.WShape[l][0] numX = self.WShape[l][1] numF = self.WShape[l][2] for ll in range(l)[::-1]: numY = self.WShape[ll][0] + (numY-1) * self.VStrideY[ll] numX = self.WShape[ll][1] + (numX-1) * self.VStrideX[ll] numF = self.WShape[ll][2] inShape = (numN, numY, numX, numF) outWeight = conv2d_oneToMany(outWeight, self.V1_W[ll], inShape, "weight_"+str(l)+"_"+str(ll), self.VStrideY[ll], self.VStrideX[ll], padding="VALID") self.visWeight.append(outWeight) #Summaries self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum") self.h_input = tf.histogram_summary('inputImage', self.inputImage, name="input") for l in range(self.numLayers): self.s_recon = tf.scalar_summary('recon error' + str(l), self.reconError[l], name="reconError") self.s_errorStd= tf.scalar_summary('errorStd' + str(l), self.errorStd[l], name="errorStd") self.s_l1= tf.scalar_summary('l1 sparsity' + str(l), self.sparseError[l], name="sparseError") self.s_l1_mean = tf.scalar_summary('l1 mean' + str(l), self.l1_mean[l], name="l1Mean") self.s_s_nnz = tf.scalar_summary('nnz' + str(l), self.nnz[l], name="nnz") self.h_input = tf.histogram_summary('scaledInput'+str(l), self.scaledInput[l], name="input") self.h_recon = tf.histogram_summary('recon' + str(l), self.recon[l], name="recon") self.h_v1_w = tf.histogram_summary('V1_W' + str(l), self.V1_W[l], name="V1_W") self.h_v1_a = tf.histogram_summary('V1_A' + str(l), self.V1_A[l], name="V1_A") self.h_log_v1_a = tf.histogram_summary('Log_V1_A' + str(l), self.log_V1_A[l], name="Log_V1_A")
def train(): """Train for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Decay the learning rate exponentially based on the number of steps. lr = create_learning_rate_scheduler(global_step, dataset=MTVSOData(subset='train')) # Create an optimizer that performs gradient descent. opt = create_optimizer(lr) # Calculate the gradients for each model tower. tower_grads, tower_logits, tower_labels, tower_losses = [], [], [], [] reuse = None # tf.variable_scope outside the loop is needed for the code to work on TensorFlow versions >=0.12 # https://github.com/tensorflow/tensorflow/issues/6220#issuecomment-266425068 with tf.variable_scope(tf.get_variable_scope()): for i in range(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: # Calculate the loss for one tower. This function constructs # the entire model but shares the variables across all towers. loss, logits, labels = tower_loss(scope, reuse) # Reuse variables for the next tower. reuse = True #tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this tower. grads = opt.compute_gradients(loss, var_list=get_variables(["visual_fc", "linear_anp", "fusion"])) # Keep track of the gradients across all towers. tower_grads.append(grads) tower_logits.append(logits) tower_labels.append(labels) tower_losses.append(loss) # Concatenate the outputs of all towers logits_op = concat(tower_logits, 0, 'concat_logits') labels_op = concat(tower_labels, 0, 'concat_labels') loss_op = tf.reduce_mean(tower_losses) # Update BN's moving_mean and moving_variance update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: tf.logging.info('Gathering update_ops') with tf.control_dependencies(tf.tuple(update_ops)): loss_op = tf.identity(loss_op) # Track the loss of all towers summaries.append(tf_.scalar_summary('combined_loss', loss_op)) # Compute top-1 accuracy top1_accuracy_op = top_k_accuracy(logits_op, labels_op, k=1) # Compute top-5 accuracy top5_accuracy_op = top_k_accuracy(logits_op, labels_op, k=5) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf_.scalar_summary('learning_rate', lr)) # Add histograms for trainable variables and gradients. maybe_track_vars_and_gradients(grads, summaries) # for op in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): # tf.logging.info(op.name) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU implementations. sess = tf.InteractiveSession(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) if FLAGS.resume_training: # Restore model weights in the case that we are resuming training restore_model(sess, saver) else: # If it is not resuming training, simply load the weights of the noun and adjective resnet restore_model(sess, saver, current_scope="resnet_nouns_v1_50", checkpoint_scope='resnet_v1_50') restore_model(sess, saver, current_scope="resnet_adjectives_v1_50", checkpoint_scope='resnet_v1_50') # Manually set the learning rate if there is no learning rate decay and we are resuming training overwrite_learning_rate(sess, lr) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf_.summary_writer(FLAGS.train_dir, sess.graph) accumulated_top1_accuracy_10_steps, accumulated_top1_accuracy_100_steps = 0., 0. accumulated_top5_accuracy_10_steps, accumulated_top5_accuracy_100_steps = 0., 0. for step in range(FLAGS.max_steps): g_step = global_step.eval() start_time = time.time() _, loss_value, top1_accuracy_value, top5_accuracy_value = sess.run([train_op, loss_op, top1_accuracy_op, top5_accuracy_op]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' accumulated_top1_accuracy_10_steps += top1_accuracy_value accumulated_top1_accuracy_100_steps += top1_accuracy_value accumulated_top5_accuracy_10_steps += top5_accuracy_value accumulated_top5_accuracy_100_steps += top5_accuracy_value # The first step is slower since we have to wait until the examples queue has over min_examples # so we will not log the throughput at step 0 if step == 0: continue if step % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = '%s: step %d, loss = %.2f, top-1 = %.3f%%, top-5 = %.3f%% ' \ '(%.1f examples/sec; %.3f sec/batch)' tf.logging.info(format_str % (datetime.datetime.now(), g_step, loss_value, accumulated_top1_accuracy_10_steps * 10, accumulated_top5_accuracy_10_steps * 10, examples_per_sec, sec_per_batch)) accumulated_top1_accuracy_10_steps = 0. accumulated_top5_accuracy_10_steps = 0. if step % 100 == 0: save_accuracy(g_step, accumulated_top1_accuracy_100_steps, accumulated_top5_accuracy_100_steps); # Build the summary operation from the last tower summaries. summary_op = tf_.merge_summary(summaries) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, g_step - 1) accumulated_top1_accuracy_100_steps = 0. accumulated_top5_accuracy_100_steps = 0. # Save the model checkpoint periodically. maybe_save_model(sess, saver, step, global_step) # Evaluate the model periodically maybe_submit_evaluation_job(step)
)).batch(1).prefetch(2) test_dataset = tf.data.Dataset.from_tensor_slices(test).map( lambda f: tuple(tf.py_func(parse_fn, [f], [tf.float64, tf.float64]) )).batch(1).repeat() train_iterator = train_dataset.make_one_shot_iterator() test_iterator = test_dataset.make_one_shot_iterator() handle = tf.placeholder(tf.string, shape=[]) iter = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes) next_el = iter.get_next() next_el = tf.tuple( [tf.squeeze(next_el[0], [0]), tf.squeeze(next_el[1], [0])]) train_handle, test_handle = sess.run( [train_iterator.string_handle(), test_iterator.string_handle()]) # initialize the iterator # sess.run([test_iterator.initializer]) # simulate training for i in range(EPOCHS): if i % 3 == 0: # run validation out = sess.run(next_el, feed_dict={handle: test_handle}) print("test out: {}".format(out)) try:
def build_graph(): # z = tf.placeholder(tf.float32, shape=(batch_size, z_dim)) noise_dist = tf.contrib.distributions.Normal(0., 1.) z = noise_dist.sample((batch_size, z_dim)) generator = generator_mlp if is_mlp else generator_conv critic = critic_mlp if is_mlp else critic_conv with tf.variable_scope('generator'): train = generator(z) real_data = tf.placeholder(dtype=tf.float32, shape=(batch_size, 32, 32, channel)) true_logit = critic(real_data) fake_logit = critic(train, reuse=True) c_loss = tf.reduce_mean(fake_logit - true_logit) if mode is 'gp': alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = real_data + alpha * (train - real_data) inte_logit = critic(interpolated, reuse=True) gradients = tf.gradients(inte_logit, [ interpolated, ])[0] grad_l2 = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3])) gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2) gp_loss_sum = tf.summary.scalar("gp_loss", gradient_penalty) grad = tf.summary.scalar("grad_norm", tf.nn.l2_loss(gradients)) c_loss += lam * gradient_penalty g_loss = tf.reduce_mean(-fake_logit) g_loss_sum = tf.summary.scalar("g_loss", g_loss) c_loss_sum = tf.summary.scalar("c_loss", c_loss) img_sum = tf.summary.image("img", train, max_outputs=10) theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_g = ly.optimize_loss( loss=g_loss, learning_rate=learning_rate_ger, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if is_adam is True else tf.train.RMSPropOptimizer, variables=theta_g, global_step=counter_g, summaries=['gradient_norm']) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_c = ly.optimize_loss( loss=c_loss, learning_rate=learning_rate_dis, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if is_adam is True else tf.train.RMSPropOptimizer, variables=theta_c, global_step=counter_c, summaries=['gradient_norm']) if mode is 'regular': clipped_var_c = [ tf.assign(var, tf.clip_by_value(var, clamp_lower, clamp_upper)) for var in theta_c ] # merge the clip operations on critic variables with tf.control_dependencies([opt_c]): opt_c = tf.tuple(clipped_var_c) if not mode in ['gp', 'regular']: raise (NotImplementedError('Only two modes')) return opt_g, opt_c, real_data
def _build_train_graph(self): with tf.variable_scope(self.name): X = tf.placeholder(tf.float32, [None] + self.shape) z = tf.placeholder(tf.float32, [None, self.z_dim]) global_step = tf.Variable(0, name='global_step', trainable=False) G = self._generator(z) C_real = self._critic(X) C_fake = self._critic(G, reuse=True) W_dist = tf.reduce_mean(C_real - C_fake) C_loss = -W_dist G_loss = tf.reduce_mean(-C_fake) C_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name + '/critic/') G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name + '/generator/') C_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name + '/critic/') G_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name + '/generator/') # In the paper, critic networks has been trained n_critic times for each training step. # Here I adjust learning rate instead. with tf.control_dependencies(C_update_ops): C_train_op = tf.train.RMSPropOptimizer(learning_rate=self.D_lr*self.n_critic).\ minimize(C_loss, var_list=C_vars) with tf.control_dependencies(G_update_ops): G_train_op = tf.train.RMSPropOptimizer(learning_rate=self.G_lr).\ minimize(G_loss, var_list=G_vars, global_step=global_step) # weight clipping ''' It is right that clips gamma of the batch_norm? ''' # ver 1. clips all variables in critic C_clips = [ tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in C_vars ] # with gamma # ver 2. does not work # C_clips = [tf.assign(var, tf.clip_by_value(var, -0.01, 0.01)) for var in C_vars if 'gamma' not in var.op.name] # without gamma # ver 3. works but strange # C_clips = [] # for var in C_vars: # if 'gamma' not in var.op.name: # C_clips.append(tf.assign(var, tf.clip_by_value(var, -0.01, 0.01))) # else: # C_clips.append(tf.assign(var, tf.clip_by_value(var, -1.00, 1.00))) with tf.control_dependencies([C_train_op]): # should be iterable C_train_op = tf.tuple(C_clips) # tf.group ? # summaries # per-step summary self.summary_op = tf.summary.merge([ tf.summary.scalar('G_loss', G_loss), tf.summary.scalar('C_loss', C_loss), tf.summary.scalar('W_dist', W_dist) ]) # sparse-step summary tf.summary.image('fake_sample', G, max_outputs=self.FAKE_MAX_OUTPUT) # tf.summary.histogram('real_probs', D_real_prob) # tf.summary.histogram('fake_probs', D_fake_prob) self.all_summary_op = tf.summary.merge_all() # accesible points self.X = X self.z = z self.D_train_op = C_train_op # compatibility for train.py self.G_train_op = G_train_op self.fake_sample = G self.global_step = global_step
train_pairs = tf.constant(ind_pairs_train) tr_data = tf.data.Dataset.from_tensor_slices(train_pairs) # tr_data = tr_data.map(lambda pair: tf.py_func(input_parser,[pair],tf.double)) tr_data = tr_data.map(lambda pair: tf.py_func(input_parser, [pair], tf.double), num_parallel_calls=12) tr_data = tr_data.batch(batchsize) tr_data = tr_data.prefetch(batchsize) iterator = tf.data.Iterator.from_structure(tr_data.output_types, tr_data.output_shapes) next_element = iterator.get_next() tr_init_op = iterator.make_initializer(tr_data) im1, im2, im3 = tf.split(next_element, 3, 3) triplet_batch = tf.tuple((im1, im2, im3)) # -------------------------------------------------- print('model') # -------------------------------------------------- from Models import Model model = Model(nchannels, imcropsize, testIdx) print('reslearn: ', model.residualLearning) # -------------------------------------------------- print('train') # -------------------------------------------------- saver = tf.train.Saver()
def _inference(self, memories, sentences, answers, keep_prob, mem_idx, sent_lexical_features, mem_lexical_features): with tf.variable_scope(self._name): memory_rnn_cell_fw = tf.contrib.rnn.GRUCell( self._rnn_memory_hidden_size) memory_rnn_cell_fw = tf.contrib.rnn.DropoutWrapper( memory_rnn_cell_fw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) memory_rnn_cell_bw = tf.contrib.rnn.GRUCell( self._rnn_memory_hidden_size) memory_rnn_cell_bw = tf.contrib.rnn.DropoutWrapper( memory_rnn_cell_bw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) mem_len = self._seq_len(memories) # [None] sent_len = self._seq_len(sentences) # [None] sent_emb = tf.nn.embedding_lookup(self._emb, sentences) # [None, sentence_size, emb_size] # m_emb = tf.nn.embedding_lookup(self._weight_matrices[0], memories) m_emb = tf.nn.embedding_lookup(self._emb, memories) # [None, memory_size, emb_size] c_emb = tf.nn.embedding_lookup(self._emb, memories) # [None, memory_size, emb_size] sent_emb = tf.concat(values=[sent_emb, sent_lexical_features], axis=2) # [None, sentence_size, emb_size + lexical_features_size] m_emb = tf.concat(values=[m_emb, mem_lexical_features], axis=2) # [None, memory_size, emb_size + lexical_features_size] c_emb = tf.concat(values=[c_emb, mem_lexical_features], axis=2) # [None, memory_size, emb_size + lexical_features_size] with tf.variable_scope("memory_rnn") as m_sentence_rnn_scope: (m_rnn_fw, m_rnn_bw), (_, _) = tf.nn.bidirectional_dynamic_rnn( memory_rnn_cell_fw, memory_rnn_cell_bw, m_emb, dtype=tf.float32, sequence_length=mem_len, scope=m_sentence_rnn_scope, swap_memory=True, ) # m_rnn_f/bw: [None, memory_size, rnn_memory_hidden_size] # m_rnn_state_f/bw: [None, rnn_memory_hidden_size] Wm_memory_rnn_fw = tf.get_variable( initializer=self._init, shape=self._rnn_memory_Ws_shape, name="W_memory_rnn_fw", ) Wm_memory_rnn_bw = tf.get_variable( initializer=self._init, shape=self._rnn_memory_Ws_shape, name="W_memory_rnn_bw", ) bm_memory_rnn = tf.get_variable( initializer=self._init, shape=self._rnn_memory_bs_shape, name="b_memory_rnn") m_rnn_output = self._nonlin( self._tensor_dot(m_rnn_fw, Wm_memory_rnn_fw) + self._tensor_dot(m_rnn_bw, Wm_memory_rnn_bw) + bm_memory_rnn) # [None, memory_size, emb_size] m = m_rnn_output # sent_emb: [None, sentence_size, emb_size] W_sent_rnn_fw = tf.get_variable( initializer=self._init, shape=self._rnn_memory_Ws_shape, name="W_sentence_rnn_fw", ) W_sent_rnn_bw = tf.get_variable( initializer=self._init, shape=self._rnn_memory_Ws_shape, name="W_sentence_rnn_bw", ) b_sent_rnn = tf.get_variable(initializer=self._init, shape=self._rnn_memory_bs_shape, name="b_sentence_rnn") m_sentence_rnn_scope.reuse_variables() (sent_rnn_fw, sent_rnn_bw), _ = tf.nn.bidirectional_dynamic_rnn( memory_rnn_cell_fw, memory_rnn_cell_bw, sent_emb, dtype=tf.float32, sequence_length=sent_len, scope=m_sentence_rnn_scope, swap_memory=True, ) # sent_rnn_f/bw: [None, memory_size, rnn_memory_hidden_size] # sent_rnn_state_f/bw: [None, rnn_memory_hidden_size] sent_rnn_output = self._nonlin( self._tensor_dot(sent_rnn_fw, W_sent_rnn_fw) + self._tensor_dot(sent_rnn_bw, W_sent_rnn_bw) + b_sent_rnn) # [None, memory_size, emb_size] sent_emb = sent_rnn_output mem_rnn_cell = MemoryNetworkNERCell( self._memory_size, self._embedding_feature_size, m, m, return_link=True, ) mem_idx_expanded = tf.expand_dims(input=mem_idx, axis=-1, name="doc_start_index_reshaped") (mem_rnn_output, mem_rnn_link), mem_rnn_state = tf.nn.dynamic_rnn( mem_rnn_cell, tf.tuple([sent_emb, mem_idx_expanded]), dtype=tf.float32, sequence_length=sent_len) # mem_rnn_output: [None, max_seq_len, hidden_size] # mem_rnn_link: [None, max_seq_len, max_seq_len] # mem_rnn_state: [None, hidden_size] rnn2mlp = self._tensor_dot(mem_rnn_output, self.RNN) + self.RNN_b # [None, sentence_size, mlp_hidden_size] mlp2tag = self._tensor_dot(rnn2mlp, self.RNN2TAG) + self.RNN2TAG_b # [None, sentence_size, answer_size] log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood( mlp2tag, answers, sent_len) return sent_len, mlp2tag, log_likelihood, transition_params, mem_rnn_link
def train(imPath,logPath,modelPath,pmPath,nTrain,nValid,nTest,restoreVariables,nSteps,gpuIndex,testPMIndex): os.environ['CUDA_VISIBLE_DEVICES']= '%d' % gpuIndex outLogPath = logPath trainWriterPath = pathjoin(logPath,'Train') validWriterPath = pathjoin(logPath,'Valid') outModelPath = pathjoin(modelPath,'model.ckpt') outPMPath = pmPath batchSize = UNet2D.hp['batchSize'] imSize = UNet2D.hp['imSize'] nChannels = UNet2D.hp['nChannels'] nClasses = UNet2D.hp['nClasses'] # -------------------------------------------------- # data # -------------------------------------------------- Train = np.zeros((nTrain,imSize,imSize,nChannels)) Valid = np.zeros((nValid,imSize,imSize,nChannels)) Test = np.zeros((nTest,imSize,imSize,nChannels)) LTrain = np.zeros((nTrain,imSize,imSize,nClasses)) LValid = np.zeros((nValid,imSize,imSize,nClasses)) LTest = np.zeros((nTest,imSize,imSize,nClasses)) print('loading data, computing mean / st dev') if not os.path.exists(modelPath): os.makedirs(modelPath) if restoreVariables: datasetMean = loadData(pathjoin(modelPath,'datasetMean.data')) datasetStDev = loadData(pathjoin(modelPath,'datasetStDev.data')) else: datasetMean = 0 datasetStDev = 0 for iSample in range(nTrain+nValid+nTest): I = im2double(tifread('%s/I%05d_Img.tif' % (imPath,iSample))) datasetMean += np.mean(I) datasetStDev += np.std(I) datasetMean /= (nTrain+nValid+nTest) datasetStDev /= (nTrain+nValid+nTest) saveData(datasetMean, pathjoin(modelPath,'datasetMean.data')) saveData(datasetStDev, pathjoin(modelPath,'datasetStDev.data')) perm = np.arange(nTrain+nValid+nTest) np.random.shuffle(perm) for iSample in range(0, nTrain): path = '%s/I%05d_Img.tif' % (imPath,perm[iSample]) im = im2double(tifread(path)) Train[iSample,:,:,0] = (im-datasetMean)/datasetStDev path = '%s/I%05d_Ant.tif' % (imPath,perm[iSample]) im = tifread(path) for i in range(nClasses): LTrain[iSample,:,:,i] = (im == i+1) for iSample in range(0, nValid): path = '%s/I%05d_Img.tif' % (imPath,perm[nTrain+iSample]) im = im2double(tifread(path)) Valid[iSample,:,:,0] = (im-datasetMean)/datasetStDev path = '%s/I%05d_Ant.tif' % (imPath,perm[nTrain+iSample]) im = tifread(path) for i in range(nClasses): LValid[iSample,:,:,i] = (im == i+1) for iSample in range(0, nTest): path = '%s/I%05d_Img.tif' % (imPath,perm[nTrain+nValid+iSample]) im = im2double(tifread(path)) Test[iSample,:,:,0] = (im-datasetMean)/datasetStDev path = '%s/I%05d_Ant.tif' % (imPath,perm[nTrain+nValid+iSample]) im = tifread(path) for i in range(nClasses): LTest[iSample,:,:,i] = (im == i+1) # -------------------------------------------------- # optimization # -------------------------------------------------- tfLabels = tf.placeholder("float", shape=[None,imSize,imSize,nClasses],name='labels') globalStep = tf.Variable(0,trainable=False) learningRate0 = 0.01 decaySteps = 1000 decayRate = 0.95 learningRate = tf.train.exponential_decay(learningRate0,globalStep,decaySteps,decayRate,staircase=True) with tf.name_scope('optim'): loss = tf.reduce_mean(-tf.reduce_sum(tf.multiply(tfLabels,tf.log(UNet2D.nn)),3)) updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # optimizer = tf.train.MomentumOptimizer(1e-3,0.9) optimizer = tf.train.MomentumOptimizer(learningRate,0.9) # optimizer = tf.train.GradientDescentOptimizer(learningRate) with tf.control_dependencies(updateOps): optOp = optimizer.minimize(loss,global_step=globalStep) with tf.name_scope('eval'): error = [] for iClass in range(nClasses): labels0 = tf.reshape(tf.to_int32(tf.slice(tfLabels,[0,0,0,iClass],[-1,-1,-1,1])),[batchSize,imSize,imSize]) predict0 = tf.reshape(tf.to_int32(tf.equal(tf.argmax(UNet2D.nn,3),iClass)),[batchSize,imSize,imSize]) correct = tf.multiply(labels0,predict0) nCorrect0 = tf.reduce_sum(correct) nLabels0 = tf.reduce_sum(labels0) error.append(1-tf.to_float(nCorrect0)/tf.to_float(nLabels0)) errors = tf.tuple(error) # -------------------------------------------------- # inspection # -------------------------------------------------- with tf.name_scope('scalars'): tf.summary.scalar('avg_cross_entropy', loss) for iClass in range(nClasses): tf.summary.scalar('avg_pixel_error_%d' % iClass, error[iClass]) tf.summary.scalar('learning_rate', learningRate) with tf.name_scope('images'): split0 = tf.slice(UNet2D.nn,[0,0,0,0],[-1,-1,-1,1]) split1 = tf.slice(UNet2D.nn,[0,0,0,1],[-1,-1,-1,1]) if nClasses > 2: split2 = tf.slice(UNet2D.nn,[0,0,0,2],[-1,-1,-1,1]) tf.summary.image('pm0',split0) tf.summary.image('pm1',split1) if nClasses > 2: tf.summary.image('pm2',split2) merged = tf.summary.merge_all() # -------------------------------------------------- # session # -------------------------------------------------- saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) # config parameter needed to save variables when using GPU if os.path.exists(outLogPath): shutil.rmtree(outLogPath) trainWriter = tf.summary.FileWriter(trainWriterPath, sess.graph) validWriter = tf.summary.FileWriter(validWriterPath, sess.graph) if restoreVariables: saver.restore(sess, outModelPath) print("Model restored.") else: sess.run(tf.global_variables_initializer()) # -------------------------------------------------- # train # -------------------------------------------------- batchData = np.zeros((batchSize,imSize,imSize,nChannels)) batchLabels = np.zeros((batchSize,imSize,imSize,nClasses)) for i in range(nSteps): # train perm = np.arange(nTrain) np.random.shuffle(perm) for j in range(batchSize): batchData[j,:,:,:] = Train[perm[j],:,:,:] batchLabels[j,:,:,:] = LTrain[perm[j],:,:,:] summary,_ = sess.run([merged,optOp],feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 1}) trainWriter.add_summary(summary, i) # validation perm = np.arange(nValid) np.random.shuffle(perm) for j in range(batchSize): batchData[j,:,:,:] = Valid[perm[j],:,:,:] batchLabels[j,:,:,:] = LValid[perm[j],:,:,:] summary, es = sess.run([merged, errors],feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0}) validWriter.add_summary(summary, i) e = np.mean(es) print('step %05d, e: %f' % (i,e)) if i == 0: if restoreVariables: lowestError = e else: lowestError = np.inf if np.mod(i,100) == 0 and e < lowestError: lowestError = e print("Model saved in file: %s" % saver.save(sess, outModelPath)) # -------------------------------------------------- # test # -------------------------------------------------- if not os.path.exists(outPMPath): os.makedirs(outPMPath) for i in range(nTest): j = np.mod(i,batchSize) batchData[j,:,:,:] = Test[i,:,:,:] batchLabels[j,:,:,:] = LTest[i,:,:,:] if j == batchSize-1 or i == nTest-1: output = sess.run(UNet2D.nn,feed_dict={UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0}) for k in range(j+1): pm = output[k,:,:,testPMIndex] gt = batchLabels[k,:,:,testPMIndex] im = np.sqrt(normalize(batchData[k,:,:,0])) imwrite(np.uint8(255*np.concatenate((im,np.concatenate((pm,gt),axis=1)),axis=1)),'%s/I%05d.png' % (outPMPath,i-j+k+1)) # -------------------------------------------------- # save hyper-parameters, clean-up # -------------------------------------------------- saveData(UNet2D.hp,pathjoin(modelPath,'hp.data')) trainWriter.close() validWriter.close() sess.close()
def fit( self, ids_train, ids_test, y_train, y_test, dense_train=None, dense_test=None, lr=0.001, N_EPOCH=50, batch_size=200, early_stopping_rounds=20, ): start_time = time.time() #[bug fix]mutable prevention 19/06/27 ids_train = ids_train.copy() ids_test = ids_test.copy() self.batch_size = batch_size #data preprocess:对ids的每个features,label encoder都要从上一个的末尾开始。函数输入时则保证每个都从0起. if self.hash_size is None: for i, column in enumerate(ids_train.columns): if i >= 1: ids_train.loc[:, column] = ids_train[column] + sum( self.features_sizes[:i]) ids_test.loc[:, column] = ids_test[column] + sum( self.features_sizes[:i]) if self.attention_FM or self.use_AutoInt: #储存为classs变量并用在get_attention里获取attention self.ids_train, self.ids_test, self.y_train, self.y_test = ids_train, ids_test, y_train, y_test self.ids = tf.placeholder(tf.int32, [None, self.fields]) self.dense_inputs = tf.placeholder(tf.float32, [None, self.dense_features_size]) self.y = tf.placeholder(tf.float32, [None, 1]) self.L2_reg = 0 self.dropout_keeprate_holder = tf.placeholder(tf.float32) embed_L2 = 0 if self.use_FM or self.use_MLP or self.use_AutoInt: self.embedding, embed_L2 = self.Embedding( self.ids, self.embedding_weights) #(None,fields,k) if self.use_SE: self.embeddingSE = self.SELayer(self.embedding, self.SE_weights) self.pred = 0 if self.use_LR: #bug detected. LR didn't keepdims self.pred = self.LR(self.ids, self.w, self.b) if self.use_MLR: print("use Mix of LR.") self.pred += self.MLR(self.ids, self.MLR_u, self.MLR_w) #only one FM will be used. if self.use_NFM: print("use NFM") self.pred += self.NFM(self.embedding, self.NFM_weights) elif self.use_BiFM: if self.use_SE: cross_term = tf.concat([ self.Bilinear_FM( self.embedding, self.bilinear_weights, se_emb=False), self.Bilinear_FM( self.embeddingSE, self.bilinear_weights, se_emb=True), ], axis=-1) # N,c,2k if self.use_FiBiNet: print("use FiBiNet") #deep backend cross_term = tf.reshape( cross_term, [-1, self.c * self.k * 2]) #None,2ck cross_term = tf.nn.relu( tf.matmul(cross_term, self.FiBiNet_weights['W1']) + self.FiBiNet_weights['b1']) self.pred += ( tf.matmul(cross_term, self.FiBiNet_weights['W2']) + self.FiBiNet_weights['b2']) else: print("use Fibifm") self.pred += tf.expand_dims(tf.reduce_sum(cross_term, axis=[1, 2]), axis=1) # N,1 else: print("use bifm") cross_term = self.Bilinear_FM(self.embedding, self.bilinear_weights, se_emb=False) # N,c,k self.pred += tf.expand_dims(tf.reduce_sum(cross_term, axis=[1, 2]), axis=1) # N,1 elif self.use_FM and not self.attention_FM and not self.use_CFM: print("use FM") if len(self.FM_ignore_interaction ) == 0: #if self.use_FM and self.FM_ignore_interaction==[] self.pred += self.FM2(self.embedding) if len(self.FM_ignore_interaction) > 0: self.pred += self.FMDE(self.embedding) elif self.use_FM and self.attention_FM: print("use AFM") afm_out, reg = self.AFM(self.embedding, self.AFM_weights) self.pred += afm_out self.L2_reg += reg elif self.use_FM and self.use_CFM: print("use CFM") cfm_out, reg = self.CFM(self.embedding, self.CFM_weights) self.pred += cfm_out self.L2_reg += reg if self.use_AutoInt: self.y_deep = self.embedding for _l in range(self.autoint_params['autoint_layers']): self.y_deep = self.AutoInt(self.y_deep, self.AutoInt_weights, layer=_l) #N,f,d self.pred += tf.matmul( tf.reshape(self.y_deep, shape=[ -1, self.fields * self.autoint_d * self.autoint_head ]), self.AutoInt_weights['W_out']) + self.AutoInt_weights['b_out'] if self.use_CrossNet_layers > 0: #combine crossnet with DNN MLP_in = tf.reshape(self.embedding, [-1, self.fields * self.k]) #(N,f*k) if self.dense_features_size > 0: MLP_in = tf.concat([MLP_in, self.dense_inputs], axis=1) #(N,f*k+dense) self.MLP_out = self.MLP(MLP_in, self.weights, self.bias, return_pred=False) #(None,last_layers) self.CrossNet_out = self.CrossNet(tf.expand_dims( MLP_in, axis=-1), self.CrossNet_weights) #(None,f*k+d) self.pred += tf.keras.layers.Dense( 1, use_bias=False, activation=None)(tf.concat([self.MLP_out, self.CrossNet_out], axis=1)) elif self.use_MLP: #并联dnn pred MLP_in = tf.reshape(self.embedding, [-1, self.fields * self.k]) #(N,f*k) if self.dense_features_size > 0: MLP_in = tf.concat([MLP_in, self.dense_inputs], axis=1) #(N,f*k+dense) self.pred += self.MLP(MLP_in, self.weights, self.bias) assert self.pred is not None, "must have one predicion layer" if self.loss_type == 'rmse': self.loss = tf.sqrt(tf.reduce_mean(tf.square(self.y - self.pred))) elif self.loss_type == 'mse': self.loss = tf.reduce_mean(tf.square(self.y - self.pred)) elif self.loss_type in ['binary_crossentropy', 'binary', 'logloss']: self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=self.pred)) else: raise Exception("Loss type %s not supported" % self.loss_type) #todo EMBEDL2 coef self.loss += self.lambda_l2 * self.L2_reg #+ embed_L2*1e-5 self.optimizer = tf.train.AdamOptimizer(lr).minimize(self.loss) if self.metric_type is not None: assert self.metric_type == 'auc' assert self.loss_type in [ 'binary_crossentropy', 'binary', 'logloss' ] #tf.auc mode: remove sklearn auc part #self.loss=tf.metrics.auc(labels=self.y,predictions=tf.nn.sigmoid(self.pred)) self.sess = self._init_session() self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) cur_best_rounds = 0 is_greater_better = False if self.metric_type is None else True #默认Loss越小越好 cur_min_loss = 1e8 if not is_greater_better else -1e8 best_weights = { v.name: v.eval(self.sess) for v in tf.trainable_variables() } for epoch in range(N_EPOCH): train_loss = 0. y_preds_train = [] total_batches = int(ids_train.shape[0] / batch_size) # id input + dense input for bx, bx_dense, by in batcher(ids_train, y_train, X_dense=dense_train, batch_size=batch_size, hash_size=self.hash_size): if self.dense_features_size > 0: _, l = self.sess.run( [self.optimizer, self.loss], feed_dict={ self.ids: bx, self.y: by, self.dense_inputs: bx_dense, self.dropout_keeprate_holder: self.dropout_keeprate }) else: _, l = self.sess.run( [self.optimizer, self.loss], feed_dict={ self.ids: bx, self.y: by, self.dropout_keeprate_holder: self.dropout_keeprate }) train_loss += l #if not self.metric_type else l[1] if self.metric_type: y_preds_train.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dense_inputs:bx_dense,self.dropout_keeprate_holder:1.0})) \ if self.dense_features_size>0 \ else y_preds_train.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dropout_keeprate_holder:1.0})) train_loss /= total_batches if self.coldStartAvg: print("Cold Start Averaging start") if epoch == 0 else None self.coldStartAvgTool() #todo movielens afm rounded test_loss = 0. y_preds = [] for bx, bx_dense, by in batcher(ids_test, y_test, X_dense=dense_test, batch_size=batch_size, hash_size=self.hash_size): if self.dense_features_size > 0: l = self.sess.run(self.loss, feed_dict={ self.ids: bx, self.y: by, self.dense_inputs: bx_dense, self.dropout_keeprate_holder: 1.0 }) else: l = self.sess.run(self.loss, feed_dict={ self.ids: bx, self.y: by, self.dropout_keeprate_holder: 1.0 }) test_loss += l #if not self.metric_type else l[1] if self.metric_type: y_preds.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dense_inputs:bx_dense,self.dropout_keeprate_holder:1.0})) \ if self.dense_features_size>0 \ else y_preds.append(self.sess.run(self.pred,feed_dict={self.ids:bx,self.dropout_keeprate_holder:1.0})) test_loss /= int(ids_test.shape[0] / batch_size) ''' y_pred=np.concatenate(y_preds, axis=0).reshape((-1)) predictions_bounded = np.maximum(y_pred, np.ones(len(y_pred)) * -1) # bound the lower values predictions_bounded = np.minimum(predictions_bounded, np.ones(len(y_pred)) * 1) # bound the higher values # override test_loss test_loss = np.sqrt(np.mean(np.square(y_test.reshape(predictions_bounded.shape)- predictions_bounded))) ''' #sklearn auc mode if self.metric_type: # override test_loss self.y_pred_train = np.concatenate(y_preds_train, axis=0) self.y_pred = np.concatenate(y_preds, axis=0) train_loss = roc_auc_score(y_train, self.y_pred_train) test_loss = roc_auc_score(y_test, self.y_pred) metrics_ = 'loss' if self.metric_type is None else 'auc' print("epoch:%s train_%s:%s test_%s:%s" % (epoch + 1, metrics_, train_loss, metrics_, test_loss)) #print("self.pred=",self.sess.run(self.pred,feed_dict={self.ids:ids_test,self.y:y_test})) #print("self.y=",y_test) if isBetter(test_loss, cur_min_loss, is_greater_better): cur_min_loss = test_loss cur_best_rounds = epoch + 1 best_weights = { v.name: v.eval(self.sess) for v in tf.trainable_variables() } if epoch + 1 - cur_best_rounds >= early_stopping_rounds: print( "[Early Stop]Early Stopping because not improved for %s rounds" % early_stopping_rounds) self.sess.run( tf.tuple([ tf.assign(var, best_weights[var.name]) for var in tf.trainable_variables() ])) best_score = cur_min_loss #self.sess.run(self.loss, feed_dict={self.ids: ids_test, self.y: y_test, }) print("[Early Stop]Best Score:", best_score, ' at round ', cur_best_rounds) print( "Train finish. Fit time:%.2f seconds. Epoch time:%.2f seconds" % (time.time() - start_time, (time.time() - start_time) / (epoch + 1))) return best_score #auc reset op self.sess.run(tf.local_variables_initializer()) self.sess.run( tf.tuple([ tf.assign(var, best_weights[var.name]) for var in tf.trainable_variables() ])) best_score = cur_min_loss #self.sess.run(self.loss, feed_dict={self.ids: ids_test, self.y: y_test,}) print("[Epoch Maxi]Best Score:", best_score, ' at round ', cur_best_rounds) print("Train finish. Fit time:%.2f seconds. Epoch time:%.2f seconds" % (time.time() - start_time, (time.time() - start_time) / N_EPOCH)) return best_score
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims( tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape( tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs = vocabs[0].embedding_lookup( inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params) tag_inputs = vocabs[1].embedding_lookup( inputs[:, :, 2], moving_params=self.moving_params) top_recur = self.embed_concat(word_inputs, tag_inputs) for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur) top_mlp = top_recur if self.n_mlp > 0: with tf.variable_scope('MLP0', reuse=reuse): dep_mlp, head_mlp, rel_mlp = self.MLP(top_mlp, n_splits=3) for i in xrange(1, self.n_mlp): with tf.variable_scope('DepMLP%d' % i, reuse=reuse): dep_mlp = self.MLP(dep_mlp) with tf.variable_scope('HeadMLP%d' % i, reuse=reuse): head_mlp = self.MLP(head_mlp) with tf.variable_scope('RelMLP%d' % i, reuse=reuse): rel_mlp = self.MLP(rel_mlp) else: dep_mlp = head_mlp = rel_mlp = top_mlp with tf.variable_scope('Parses', reuse=reuse): parse_logits = self.bilinear_classifier(dep_mlp, head_mlp, add_bias1=True) parse_output = self.output(parse_logits, targets[:, :, 1]) with tf.variable_scope('Rels', reuse=reuse): rel_logits = self.linear_classifier(rel_mlp, len(vocabs[2])) rel_output = self.output(rel_logits, targets[:, :, 2]) output = {} output['probabilities'] = tf.tuple( [parse_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.pack( [parse_output['predictions'], rel_output['predictions']]) output['correct'] = parse_output['correct'] * rel_output['correct'] output['tokens'] = parse_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = parse_output['loss'] + rel_output['loss'] output['embed'] = tf.pack([word_inputs, tag_inputs]) output['recur'] = top_recur output['dep'] = dep_mlp output['head'] = head_mlp output['rel'] = rel_mlp output['parse_logits'] = parse_logits output['rel_logits'] = rel_logits return output
def buildModel(self, inputShape): assert (inputShape[0] % self.VStrideY == 0) assert (inputShape[1] % self.VStrideX == 0) V_Y = int(inputShape[0] / self.VStrideY) V_X = int(inputShape[1] / self.VStrideX) self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2]) self.WShape = (self.patchSizeY, self.patchSizeX, 3, self.numV) self.VShape = (self.batchSize, V_Y, V_X, self.numV) #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable(self.imageShape, "inputImage") #Scale inputImage self.scaled_inputImage = self.inputImage / np.sqrt( self.patchSizeX * self.patchSizeY * inputShape[2]) with tf.name_scope("Dictionary"): self.V1_W = sparse_weight_variable(self.WShape, "V1_W") with tf.name_scope("weightNorm"): self.normVals = tf.sqrt( tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2], keep_dims=True)) self.normalize_W = self.V1_W.assign(self.V1_W / (self.normVals + 1e-8)) with tf.name_scope("FISTA"): #Soft threshold self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3) self.V1_Y = weight_variable(self.VShape, "V1_Y", 1e-3) self.T = tf.Variable(1.0, "T") self.oldA = weight_variable(self.VShape, "oldA", 1e-3) self.oldY = weight_variable(self.VShape, "oldY", 1e-3) self.oldT = tf.Variable(1.0, "oldT") self.randV1 = tf.truncated_normal(self.VShape, mean=0, stddev=1e-3) #Reassign nodes self.resetV1 = self.V1_A.assign(self.randV1) self.resetT = self.T.assign(1.0) self.resetY = self.V1_Y.assign(self.V1_A) with tf.name_scope("Recon"): assert (self.VStrideY >= 1) assert (self.VStrideX >= 1) #We build index tensor in numpy to gather self.recon = conv2d_oneToMany(self.V1_A, self.V1_W, self.imageShape, "recon", self.VStrideY, self.VStrideX) with tf.name_scope("Error"): self.error = self.scaled_inputImage - self.recon with tf.name_scope("Loss"): self.reconError = tf.reduce_mean( tf.reduce_sum(tf.square(self.error), reduction_indices=[1, 2, 3])) self.l1Sparsity = tf.reduce_mean( tf.reduce_sum(tf.abs(self.V1_A), reduction_indices=[1, 2, 3])) #Define loss self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity with tf.name_scope("Opt"): ##Define optimizer ##self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss, #self.optimizerA = tf.train.AdamOptimizer(self.learningRateA).minimize(self.loss, # var_list=[ # self.V1_A # ]) self.reconGrad = self.learningRateA * tf.gradients( self.reconError, [self.V1_A])[0] #Store old values in tensors #This is to avoid updating a variable too early to affect new values self.optimizerA0 = tf.tuple([ self.oldA.assign(self.V1_A), self.oldT.assign(self.T), self.oldY.assign(self.V1_Y), ]) self.newA = tf.nn.relu( tf.abs(self.oldY - self.reconGrad) - self.thresh * self.learningRateA) * tf.sign(self.oldA) self.newT = (1 + tf.sqrt(4 * tf.square(self.oldT))) / 2 self.newY = self.newA + ( (self.oldT - 1) / (self.newT + 1e-8)) * (self.newA - self.oldA) #We update actual variables self.optimizerA1 = self.V1_Y.assign(self.newY) self.optimizerA2 = self.T.assign(self.newT) self.optimizerA3 = self.V1_A.assign(self.newA) self.optimizerA = tf.tuple( [self.optimizerA1, self.optimizerA2, self.optimizerA3]) self.optimizerW = tf.train.AdadeltaOptimizer( self.learningRateW, epsilon=1e-6).minimize(self.loss, var_list=[self.V1_W]) with tf.name_scope("stats"): self.nnz = tf.reduce_mean( tf.cast(tf.not_equal(self.V1_A, 0), tf.float32)) self.errorStd = tf.sqrt( tf.reduce_mean( tf.square(self.error - tf.reduce_mean(self.error))) ) * np.sqrt(self.patchSizeY * self.patchSizeX * inputShape[2]) self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A)) self.weightImages = tf.transpose(self.V1_W, [3, 0, 1, 2]) #For log of activities self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-15) #Summaries self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum") self.s_recon = tf.scalar_summary('recon error', self.reconError, name="reconError") self.s_errorStd = tf.scalar_summary('errorStd', self.errorStd, name="errorStd") self.s_l1 = tf.scalar_summary('l1 sparsity', self.l1Sparsity, name="l1Sparsity") self.s_l1_mean = tf.scalar_summary('l1 mean', self.l1_mean, name="l1Mean") self.s_s_nnz = tf.scalar_summary('nnz', self.nnz, name="nnz") self.h_input = tf.histogram_summary('input', self.inputImage, name="input") self.h_recon = tf.histogram_summary('recon', self.recon, name="recon") self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W") self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A") self.h_log_v1_a = tf.histogram_summary('Log_V1_A', self.log_V1_A, name="Log_V1_A") self.h_normVals = tf.histogram_summary('normVals', self.normVals, name="normVals")
def _rcn_head(self, inputs, image_shape, nms_threshold, rpn_thresholds, rcn_batch, batch_size, name='rcn_head', **kwargs): anchors_labels = self.anchors_placeholders['labels'] feature_maps, rpn_reg, rpn_cls = inputs n_anchors = self.n_anchors with tf.variable_scope(name): rcn_input_indices = non_max_suppression( rpn_reg, rpn_cls, batch_size, n_anchors, iou_threshold=nms_threshold, score_threshold=rpn_thresholds[1], nonempty=True) rcn_input_indices = tf.cond( self.is_training, lambda: self.create_bbox_batch(rcn_input_indices, rcn_batch), lambda: rcn_input_indices) rcn_input_rois, rcn_input_labels = self._get_rois_and_labels( rpn_reg, anchors_labels, rcn_input_indices) for tensor in rcn_input_rois: tf.add_to_collection('roi', tensor) for tensor in rcn_input_labels: tf.add_to_collection('targets', tensor) roi_factor = np.array(self.map_shape / image_shape) rcn_input_rois = self.stop_gradient_tuple(rcn_input_rois) rcn_input_labels = self.stop_gradient_tuple(rcn_input_labels) roi_cropped = roi_pooling_layer(feature_maps, rcn_input_rois, factor=roi_factor, shape=(7, 7), data_format=kwargs['data_format']) indices, roi_cropped, rcn_input_labels = self._stack_tuple( roi_cropped, rcn_input_labels) # pylint: disable=unbalanced-tuple-unpacking rcn_clsf = conv_block(roi_cropped, 'f', units=10, name='output_conv', **kwargs) loss = self.rcn_loss(rcn_clsf, rcn_input_labels) rcn_clsf = tf.argmax(rcn_clsf, axis=-1) rcn_clsf = self._unstack_tuple(rcn_clsf, indices) rcn_clsf = tf.tuple(rcn_clsf, name='clsf') for tensor in rcn_clsf: tf.add_to_collection('rcn_output', tensor) loss = tf.identity(loss, 'loss') return rcn_clsf, loss
def lstm_def(self, rnn_input, seq_len): # Automatically reset state in each batch # Define cells of acoustic model with tf.variable_scope('LSTM'): def lstm_cell(): if self.proj_dim == self.hidden_size: return tf.contrib.rnn.LSTMCell( self.hidden_size, use_peepholes=self.use_peepholes, forget_bias = 0.0, state_is_tuple=self.state_is_tuple, reuse=tf.get_variable_scope().reuse) else: return tf.contrib.rnn.LSTMCell( self.hidden_size, use_peepholes=self.use_peepholes, num_proj=self.proj_dim, forget_bias = 0.0, state_is_tuple=self.state_is_tuple, reuse=tf.get_variable_scope().reuse) layers_list = [] for n in range(self.num_layers): cell = lstm_cell() if not self.forward_only: if self.keep_prob < 1.0: cell = tf.contrib.rnn.DropoutWarpper(cell, output_keep_prob = self.keep_prob) layers_list.append(cell) # Store the layers in a multi-layer RNN cell = tf.contrib.rnn.MultiRNNCell(layers_list, state_is_tuple=self.state_is_tuple) # Define some variables to store the RNN state # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state # between batches, especially when doing live transcript # Another way would have been to get the state as an output of the session and feed it every time but # this way is much more efficient with tf.variable_scope('Hidden_state'): state_variables = [] for state_c, state_h in cell.zero_state(self.batch_size, tf.float32): state_variables.append(tf.contrib.rnn.LSTMStateTuple( tf.Variable(state_c, trainable=False), tf.Variable(state_h, trainable=False))) # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state rnn_tuple_state = tuple(state_variables) # Build the RNN with tf.name_scope("LSTM"): rnn_outputs, new_states = tf.nn.dynamic_rnn(cell=cell, inputs=rnn_input, sequence_length=seq_len, initial_state=rnn_tuple_state, dtype=tf.float32, time_major=self.time_major) # print("rnn_outputs:",rnn_outputs.shape[2]) # Define an op to keep the hidden state between batches update_ops = [] for state_variable, new_state in zip(rnn_tuple_state, new_states): # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(new_state[0]), state_variable[1].assign(new_state[1])]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_keep_state_op = tf.tuple(update_ops) # Define an op to reset the hidden state to zeros update_ops = [] for state_variable in rnn_tuple_state: # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])), state_variable[1].assign(tf.zeros_like(state_variable[1]))]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_state_zero_op = tf.tuple(update_ops) if not self.time_major: rnn_outputs = tf.transpose(rnn_outputs, [1, 0, 2]) # [time, batch_size, cell_outdim] return rnn_outputs, rnn_keep_state_op, rnn_state_zero_op batch_size = self.batch_size print(batch_size,self.proj_dim,self.output_size,seq_len.shape) rnn_outputs = tf.reshape(rnn_outputs, [-1, self.proj_dim]) logits = tf.matmul(rnn_outputs, self.W) + self.bias logits = tf.reshape(logits, [-1, batch_size, self.output_size]) #output_log = tf.nn.softmax(logits) #output_log = tf.reshape(output_log, [seq_len.shape, -1, self.output_size]) return logits, rnn_keep_state_op, rnn_state_zero_op
def buildModel(self, inputShape): assert(inputShape[0] % self.VStrideY == 0) assert(inputShape[1] % self.VStrideX == 0) V_Y = int(inputShape[0]/self.VStrideY) V_X = int(inputShape[1]/self.VStrideX) self.imageShape = (self.batchSize, inputShape[0], inputShape[1], inputShape[2]) self.WShape = (self.patchSizeY, self.patchSizeX, 3, self.numV) self.VShape = (self.batchSize, V_Y, V_X, self.numV) #Running on GPU with tf.device(self.device): with tf.name_scope("inputOps"): #Get convolution variables as placeholders self.inputImage = node_variable(self.imageShape, "inputImage") #Scale inputImage self.scaled_inputImage = self.inputImage/np.sqrt(self.patchSizeX*self.patchSizeY*inputShape[2]) with tf.name_scope("Dictionary"): self.V1_W = sparse_weight_variable(self.WShape, "V1_W") with tf.name_scope("weightNorm"): self.normVals = tf.sqrt(tf.reduce_sum(tf.square(self.V1_W), reduction_indices=[0, 1, 2], keep_dims=True)) self.normalize_W = self.V1_W.assign(self.V1_W/(self.normVals + 1e-8)) with tf.name_scope("FISTA"): #Soft threshold self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3) self.V1_Y = weight_variable(self.VShape, "V1_Y", 1e-3) self.T = tf.Variable(1.0, "T") self.oldA = weight_variable(self.VShape, "oldA", 1e-3) self.oldY = weight_variable(self.VShape, "oldY", 1e-3) self.oldT = tf.Variable(1.0, "oldT") self.randV1 = tf.truncated_normal(self.VShape, mean=0, stddev=1e-3) #Reassign nodes self.resetV1 = self.V1_A.assign(self.randV1) self.resetT = self.T.assign(1.0) self.resetY = self.V1_Y.assign(self.V1_A) with tf.name_scope("Recon"): assert(self.VStrideY >= 1) assert(self.VStrideX >= 1) #We build index tensor in numpy to gather self.recon = conv2d_oneToMany(self.V1_A, self.V1_W, self.imageShape, "recon", self.VStrideY, self.VStrideX) with tf.name_scope("Error"): self.error = self.scaled_inputImage - self.recon with tf.name_scope("Loss"): self.reconError = tf.reduce_mean(tf.reduce_sum(tf.square(self.error), reduction_indices=[1, 2, 3])) self.l1Sparsity = tf.reduce_mean(tf.reduce_sum(tf.abs(self.V1_A), reduction_indices=[1, 2, 3])) #Define loss self.loss = self.reconError/2 + self.thresh * self.l1Sparsity with tf.name_scope("Opt"): ##Define optimizer ##self.optimizerA = tf.train.GradientDescentOptimizer(self.learningRateA).minimize(self.loss, #self.optimizerA = tf.train.AdamOptimizer(self.learningRateA).minimize(self.loss, # var_list=[ # self.V1_A # ]) self.reconGrad = self.learningRateA * tf.gradients(self.reconError, [self.V1_A])[0] #Store old values in tensors #This is to avoid updating a variable too early to affect new values self.optimizerA0 = tf.tuple([ self.oldA.assign(self.V1_A), self.oldT.assign(self.T), self.oldY.assign(self.V1_Y), ]) self.newA = tf.nn.relu(tf.abs(self.oldY - self.reconGrad) - self.thresh*self.learningRateA) * tf.sign(self.oldA) self.newT = (1+tf.sqrt(4*tf.square(self.oldT)))/2 self.newY = self.newA + ((self.oldT-1)/(self.newT+1e-8))*(self.newA-self.oldA) #We update actual variables self.optimizerA1 = self.V1_Y.assign(self.newY) self.optimizerA2 = self.T.assign(self.newT) self.optimizerA3 = self.V1_A.assign(self.newA) self.optimizerA = tf.tuple([self.optimizerA1, self.optimizerA2, self.optimizerA3]) self.optimizerW = tf.train.AdadeltaOptimizer(self.learningRateW, epsilon=1e-6).minimize(self.loss, var_list=[ self.V1_W ]) with tf.name_scope("stats"): self.nnz = tf.reduce_mean(tf.cast(tf.not_equal(self.V1_A, 0), tf.float32)) self.errorStd = tf.sqrt(tf.reduce_mean(tf.square(self.error-tf.reduce_mean(self.error))))*np.sqrt(self.patchSizeY*self.patchSizeX*inputShape[2]) self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A)) self.weightImages = tf.transpose(self.V1_W, [3, 0, 1, 2]) #For log of activities self.log_V1_A = tf.log(tf.abs(self.V1_A)+1e-15) #Summaries self.s_loss = tf.scalar_summary('loss', self.loss, name="lossSum") self.s_recon = tf.scalar_summary('recon error', self.reconError, name="reconError") self.s_errorStd= tf.scalar_summary('errorStd', self.errorStd, name="errorStd") self.s_l1= tf.scalar_summary('l1 sparsity', self.l1Sparsity, name="l1Sparsity") self.s_l1_mean = tf.scalar_summary('l1 mean', self.l1_mean, name="l1Mean") self.s_s_nnz = tf.scalar_summary('nnz', self.nnz, name="nnz") self.h_input = tf.histogram_summary('input', self.inputImage, name="input") self.h_recon = tf.histogram_summary('recon', self.recon, name="recon") self.h_v1_w = tf.histogram_summary('V1_W', self.V1_W, name="V1_W") self.h_v1_a = tf.histogram_summary('V1_A', self.V1_A, name="V1_A") self.h_log_v1_a = tf.histogram_summary('Log_V1_A', self.log_V1_A, name="Log_V1_A") self.h_normVals = tf.histogram_summary('normVals', self.normVals, name="normVals")
def build_input_graph(self): # Identify number of channels mask_objects = self.config["dataset"]["locations"]["mask_objects"] if mask_objects: img_channels = len( self.config["dataset"]["images"]["channels"]) + 1 else: img_channels = len(self.config["dataset"]["images"]["channels"]) crop_channels = len(self.config["dataset"]["images"]["channels"]) # Identify image and box sizes box_size = self.config["dataset"]["locations"]["box_size"] img_width = self.config["dataset"]["images"]["width"] img_height = self.config["dataset"]["images"]["height"] # Data shapes num_targets = len(self.dset.targets) crop_shape = [(box_size, box_size, crop_channels)] + [()] * num_targets imgs_shape = [None, img_height, img_width, img_channels] batch_shape = (-1, img_height, img_width, img_channels) # Inputs to cropping graph image_ph = tf.placeholder(tf.float32, shape=imgs_shape, name="raw_images") boxes_ph = tf.placeholder(tf.float32, shape=[None, 4], name="cell_boxes") box_ind_ph = tf.placeholder(tf.int32, shape=[None], name="box_indicators") mask_ind_ph = tf.placeholder(tf.int32, shape=[None], name="mask_indicators") targets_phs = {} for i in range(num_targets): tname = "target_" + str(i) tgt = self.dset.targets[i] targets_phs[tname] = tf.placeholder(tf.int32, shape=[None], name=tname) # Outputs and cache of the cropping graph crop_op = crop_graph(image_ph, boxes_ph, box_ind_ph, mask_ind_ph, box_size, mask_objects) labeled_crops = tf.tuple([crop_op] + [targets_phs[t] for t in targets_phs.keys()]) self.input_variables = { "image_ph": image_ph, "boxes_ph": boxes_ph, "box_ind_ph": box_ind_ph, "targets_phs": targets_phs, "mask_ind_ph": mask_ind_ph, "labeled_crops": labeled_crops, "shapes": { "crops": crop_shape, "images": imgs_shape, "batch": batch_shape }, } # Training variables self.train_variables = { "image_batch": self.input_variables["labeled_crops"][0], "target_0": tf.one_hot(self.input_variables["labeled_crops"][1], self.dset.targets[0].shape[1]) }
def natural_to_standard(eta1, eta2, name='gauss_to_stndrd'): with tf.name_scope(name): sigma = tf.matrix_inverse(-2 * eta2) mu = tf.matmul(sigma, tf.expand_dims(eta1, axis=2)) mu = tf.reshape(mu, eta1.get_shape()) return tf.tuple((mu, sigma), name='stndrd_params')
def create_model_multigpu(self): losses = [] grads = [] ops = [tf.constant(0)] self.objs = [] self.global_step = tf.train.get_or_create_global_step() optim = self.get_optim() fetch_data = None if self.model_config.fetch_mode == 'tf_example_dataset': fetch_data = self.data.get_data_sample() with tf.variable_scope(tf.get_variable_scope()) as scope: for gpu_id in range(self.model_config.num_gpus): with tf.device('/device:GPU:%d' % gpu_id): with tf.name_scope('%s_%d' % ('gpu_scope', gpu_id)): loss, obj = self.create_model(fetch_data=fetch_data) if self.model_config.npad_mode == 'v1': vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope= 'model/transformer_decoder/decoder/layer_5/npad/' ) grad = optim.compute_gradients( loss, colocate_gradients_with_ops=True, var_list=vars) elif self.model_config.npad_mode == 'static_seq': vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model/transformer_decoder/npad/') grad = optim.compute_gradients( loss, colocate_gradients_with_ops=True, var_list=vars) else: grad = optim.compute_gradients( loss, colocate_gradients_with_ops=True) tf.get_variable_scope().reuse_variables() losses.append(loss) grads.append(grad) if 'rule' in self.model_config.memory and self.is_train: ops.append(obj['mem_contexts']) ops.append(obj['mem_outputs']) ops.append(obj['mem_counter']) self.objs.append(obj) with tf.variable_scope('optimization'): self.loss = tf.divide(tf.add_n(losses), self.model_config.num_gpus) self.perplexity = tf.exp(tf.reduce_mean(self.loss)) if self.is_train: avg_grad = self.average_gradients(grads) grads = [g for (g, v) in avg_grad] clipped_grads, _ = tf.clip_by_global_norm( grads, self.model_config.max_grad_norm) if self.model_config.npad_mode == 'v1': vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model/transformer_decoder/decoder/layer_5/npad/' ) elif self.model_config.npad_mode == 'static_seq': vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='model/transformer_decoder/npad/') else: vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.train_op = optim.apply_gradients( zip(clipped_grads, vars), global_step=self.global_step) self.increment_global_step = tf.assign_add(self.global_step, 1) self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) self.ops = tf.tuple(ops)
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims( tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape( tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs, pret_inputs = vocabs[0].embedding_lookup( inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params) tag_inputs = vocabs[1].embedding_lookup( inputs[:, :, 2], moving_params=self.moving_params) top_recur = self.embed_concat(word_inputs + pret_inputs, tag_inputs) for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur) top_mlp = top_recur with tf.variable_scope('MLP0', reuse=reuse): parse_mlp, rel_mlp = self.double_MLP(top_mlp, n_splits=2) with tf.variable_scope('Parses', reuse=reuse): parse_logits = tf.squeeze(self.linear_classifier(parse_mlp, 1)) parse_output = self.output(parse_logits, targets[:, :, 1]) if moving_params is None: predictions = targets[:, :, 1] else: predictions = parse_output['predictions'] with tf.variable_scope('Rels', reuse=reuse): rel_logits, rel_logits_cond = self.conditional_linear_classifier( rel_mlp, len(vocabs[2]), predictions) rel_output = self.output(rel_logits, targets[:, :, 2]) rel_output['probabilities'] = self.conditional_probabilities( rel_logits_cond, transpose=False) output = {} output['probabilities'] = tf.tuple( [parse_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.pack( [parse_output['predictions'], rel_output['predictions']]) output['correct'] = parse_output['correct'] * rel_output['correct'] output['tokens'] = parse_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = parse_output['loss'] + rel_output['loss'] output['embed'] = tf.pack([word_inputs, tag_inputs]) output['recur'] = top_recur output['parse_mlp'] = parse_mlp output['rel_mlp'] = rel_mlp output['parse_logits'] = parse_logits output['rel_logits'] = rel_logits return output
def train_loop_body(step): train_op = optimizer.minimize( build_loss_fn if tf.executing_eagerly() else build_loss_fn()) return tf.tuple([tf.add(step, 1)], control_inputs=[train_op])
def data_output(self): return tf.tuple(tensors=[self.generated_img, self.reproduced_sound])
def training_graph(self, input_data, input_labels, random_seed): """Constructs a TF graph for training a random tree. Args: input_data: A tensor or placeholder for input data. input_labels: A tensor or placeholder for labels associated with input_data. random_seed: The random number generator seed to use for this tree. 0 means use the current time as the seed. Returns: The last op in the random tree training graph. """ # Count extremely random stats. (pcw_node_delta, pcw_splits_indices, pcw_splits_delta, pcw_totals_indices, pcw_totals_delta, input_leaves) = ( self.training_ops.count_extremely_random_stats( input_data, input_labels, self.variables.tree, self.variables.tree_thresholds, self.variables.node_to_accumulator_map, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, num_classes=self.params.num_classes)) node_update_op = tf.assign_add(self.variables.node_per_class_weights, pcw_node_delta) candidate_update_op = self.training_ops.scatter_add_ndim( self.variables.candidate_split_per_class_weights, pcw_splits_indices, pcw_splits_delta) totals_update_op = self.training_ops.scatter_add_ndim( self.variables.total_split_per_class_weights, pcw_totals_indices, pcw_totals_delta) # Sample inputs. update_indices, feature_updates, threshold_updates = ( self.training_ops.sample_inputs( input_data, self.variables.node_to_accumulator_map, input_leaves, self.variables.candidate_split_features, self.variables.candidate_split_thresholds, split_initializations_per_input=( self.params.split_initializations_per_input), split_sampling_random_seed=random_seed)) update_features_op = tf.scatter_update( self.variables.candidate_split_features, update_indices, feature_updates) update_thresholds_op = tf.scatter_update( self.variables.candidate_split_thresholds, update_indices, threshold_updates) # Calculate finished nodes. with tf.control_dependencies([totals_update_op]): children = tf.squeeze(tf.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = tf.equal(LEAF_NODE, children) leaves = tf.to_int32(tf.squeeze(tf.where(is_leaf), squeeze_dims=[1])) finished = self.training_ops.finished_nodes( leaves, self.variables.node_to_accumulator_map, self.variables.total_split_per_class_weights, num_split_after_samples=self.params.split_after_samples) # Update leaf scores. # TODO(gilberth): Optimize this. It currently calculates counts for # every non-fertile leaf. with tf.control_dependencies([node_update_op]): def f1(): return self.variables.non_fertile_leaf_scores def f2(): counts = tf.gather(self.variables.node_per_class_weights, self.variables.non_fertile_leaves) new_scores = self._weighted_gini(counts) return tf.assign(self.variables.non_fertile_leaf_scores, new_scores) # Because we can't have tf.self.variables of size 0, we have to put in a # garbage value of -1 in there. Here we check for that so we don't # try to index into node_per_class_weights in a tf.gather with a negative # number. update_nonfertile_leaves_scores_op = tf.cond(tf.less( self.variables.non_fertile_leaves[0], 0), f1, f2) # Calculate best splits. with tf.control_dependencies([candidate_update_op, totals_update_op]): split_indices = self.training_ops.best_splits( finished, self.variables.node_to_accumulator_map, self.variables.candidate_split_per_class_weights, self.variables.total_split_per_class_weights) # Grow tree. with tf.control_dependencies([update_features_op, update_thresholds_op]): (tree_update_indices, tree_children_updates, tree_threshold_updates, tree_depth_updates, new_eot) = ( self.training_ops.grow_tree( self.variables.end_of_tree, self.variables.tree_depths, self.variables.node_to_accumulator_map, finished, split_indices, self.variables.candidate_split_features, self.variables.candidate_split_thresholds)) tree_update_op = tf.scatter_update( self.variables.tree, tree_update_indices, tree_children_updates) threhsolds_update_op = tf.scatter_update( self.variables.tree_thresholds, tree_update_indices, tree_threshold_updates) depth_update_op = tf.scatter_update( self.variables.tree_depths, tree_update_indices, tree_depth_updates) # Update fertile slots. with tf.control_dependencies([update_nonfertile_leaves_scores_op, depth_update_op]): (node_map_updates, accumulators_cleared, accumulators_allocated, new_nonfertile_leaves, new_nonfertile_leaves_scores) = ( self.training_ops.update_fertile_slots( finished, self.variables.non_fertile_leaves, self.variables.non_fertile_leaf_scores, self.variables.end_of_tree, self.variables.tree_depths, self.variables.candidate_split_per_class_weights, self.variables.total_split_per_class_weights, self.variables.node_to_accumulator_map, max_depth=self.params.max_depth)) # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has # used it to calculate new leaves. gated_new_eot, = tf.tuple([new_eot], control_inputs=[new_nonfertile_leaves]) eot_update_op = tf.assign(self.variables.end_of_tree, gated_new_eot) updates = [] updates.append(eot_update_op) updates.append(tree_update_op) updates.append(threhsolds_update_op) updates.append(tf.assign( self.variables.non_fertile_leaves, new_nonfertile_leaves, validate_shape=False)) updates.append(tf.assign( self.variables.non_fertile_leaf_scores, new_nonfertile_leaves_scores, validate_shape=False)) updates.append(tf.scatter_update( self.variables.node_to_accumulator_map, tf.squeeze(tf.slice(node_map_updates, [0, 0], [1, -1]), squeeze_dims=[0]), tf.squeeze(tf.slice(node_map_updates, [1, 0], [1, -1]), squeeze_dims=[0]))) cleared_and_allocated_accumulators = tf.concat( 0, [accumulators_cleared, accumulators_allocated]) # Calculate values to put into scatter update for candidate counts. # Candidate split counts are always reset back to 0 for both cleared # and allocated accumulators. This means some accumulators might be doubly # reset to 0 if the were released and not allocated, then later allocated. candidate_pcw_values = tf.tile( tf.expand_dims(tf.expand_dims( tf.zeros_like(cleared_and_allocated_accumulators, dtype=tf.float32), 1), 2), [1, self.params.num_splits_to_consider, self.params.num_classes]) updates.append(tf.scatter_update( self.variables.candidate_split_per_class_weights, cleared_and_allocated_accumulators, candidate_pcw_values)) # Calculate values to put into scatter update for total counts. total_cleared = tf.tile( tf.expand_dims( tf.neg(tf.ones_like(accumulators_cleared, dtype=tf.float32)), 1), [1, self.params.num_classes]) total_reset = tf.tile( tf.expand_dims( tf.zeros_like(accumulators_allocated, dtype=tf.float32), 1), [1, self.params.num_classes]) total_pcw_updates = tf.concat(0, [total_cleared, total_reset]) updates.append(tf.scatter_update( self.variables.total_split_per_class_weights, cleared_and_allocated_accumulators, total_pcw_updates)) # Calculate values to put into scatter update for candidate splits. split_features_updates = tf.tile( tf.expand_dims( tf.neg(tf.ones_like(cleared_and_allocated_accumulators)), 1), [1, self.params.num_splits_to_consider]) updates.append(tf.scatter_update( self.variables.candidate_split_features, cleared_and_allocated_accumulators, split_features_updates)) return tf.group(*updates)
def _build_base_rnn(self, inputs, input_seq_lengths, forward_only=True): """ Build the Acoustic RNN Parameters ---------- :param inputs: inputs to the RNN :param input_seq_lengths: vector containing the length of each input from 'inputs' :param forward_only: whether the RNN will be used for training or not (if true then add a dropout layer) Returns ---------- :returns logits: each char probability for each timestep of the input, for each item of the batch :returns prediction: the best prediction for the input :returns rnn_keep_state_op: a tensorflow op to save the RNN internal state for the next batch :returns rnn_state_zero_op: a tensorflow op to reset the RNN internal state to zeros :returns input_keep_prob_ph: a placeholder for input_keep_prob of the dropout layer (None if forward_only is True) :returns output_keep_prob_ph: a placeholder for output_keep_prob of the dropout layer (None if forward_only is True) :returns rnn_tuple_state: the RNN internal state """ # Define a variable to keep track of the learning process step global_step = tf.Variable(0, trainable=False, name='global_step') # If building the RNN for training then create dropout rate placeholders input_keep_prob_ph = output_keep_prob_ph = None if not forward_only: with tf.name_scope('dropout'): # Create placeholders, used to override values when running on the test set input_keep_prob_ph = tf.placeholder(tf.float32) output_keep_prob_ph = tf.placeholder(tf.float32) # Define cells of acoustic model with tf.variable_scope('LSTM'): # Create each layer layers_list = [] for _ in range(self.num_layers): cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_size, state_is_tuple=True) # If building the RNN for training then add a dropoutWrapper to the cells if not forward_only: with tf.name_scope('dropout'): cell = tf.contrib.rnn.DropoutWrapper(cell, input_keep_prob=input_keep_prob_ph, output_keep_prob=output_keep_prob_ph) layers_list.append(cell) # Store the layers in a multi-layer RNN cell = tf.contrib.rnn.MultiRNNCell(layers_list, state_is_tuple=True) # Build the input layer between input and the RNN with tf.variable_scope('Input_Layer'): w_i = tf.get_variable("input_w", [self.input_dim, self.hidden_size], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_i = tf.get_variable("input_b", [self.hidden_size], tf.float32, initializer=tf.constant_initializer(0.0)) # Apply the input layer to the network input to produce the input for the rnn part of the network rnn_inputs = [tf.matmul(tf.squeeze(i, axis=[0]), w_i) + b_i for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=inputs)] # Switch from a list to a tensor rnn_inputs = tf.stack(rnn_inputs) # Add a batch normalization layer to the model if needed if self.normalization: with tf.name_scope('Normalization'): epsilon = 1e-3 # Note : the tensor is [time, batch_size, input vector] so we go against dim 1 batch_mean, batch_var = tf.nn.moments(rnn_inputs, [1], shift=None, name="moments", keep_dims=True) rnn_inputs = tf.nn.batch_normalization(rnn_inputs, batch_mean, batch_var, None, None, epsilon, name="batch_norm") # Define some variables to store the RNN state # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state # between batches, especially when doing live transcript # Another way would have been to get the state as an output of the session and feed it every time but # this way is much more efficient with tf.variable_scope('Hidden_state'): state_variables = [] for state_c, state_h in cell.zero_state(self.batch_size, tf.float32): state_variables.append(tf.nn.rnn_cell.LSTMStateTuple( tf.Variable(state_c, trainable=False), tf.Variable(state_h, trainable=False))) # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state rnn_tuple_state = tuple(state_variables) # Build the RNN with tf.name_scope('LSTM'): rnn_output, new_states = tf.nn.dynamic_rnn(cell, rnn_inputs, sequence_length=input_seq_lengths, initial_state=rnn_tuple_state, time_major=True) # Define an op to keep the hidden state between batches update_ops = [] for state_variable, new_state in zip(rnn_tuple_state, new_states): # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(new_state[0]), state_variable[1].assign(new_state[1])]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_keep_state_op = tf.tuple(update_ops) # Define an op to reset the hidden state to zeros update_ops = [] for state_variable in rnn_tuple_state: # Assign the new state to the state variables on this layer update_ops.extend([state_variable[0].assign(tf.zeros_like(state_variable[0])), state_variable[1].assign(tf.zeros_like(state_variable[1]))]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_state_zero_op = tf.tuple(update_ops) # Build the output layer between the RNN and the char_map with tf.variable_scope('Output_layer'): w_o = tf.get_variable("output_w", [self.hidden_size, self.num_labels], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_o = tf.get_variable("output_b", [self.num_labels], tf.float32, initializer=tf.constant_initializer(0.0)) # Compute the logits (each char probability for each timestep of the input, for each item of the batch) logits = tf.stack([tf.matmul(tf.squeeze(i, axis=[0]), w_o) + b_o for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=rnn_output)]) # Compute the prediction which is the best "path" of probabilities for each item of the batch decoded, _log_prob = tf.nn.ctc_beam_search_decoder(logits, input_seq_lengths) # Set the RNN result to the best path found prediction = tf.to_int32(decoded[0]) return global_step, logits, prediction, rnn_keep_state_op, rnn_state_zero_op,\ input_keep_prob_ph, output_keep_prob_ph, rnn_tuple_state