def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): pi, mus, sigmas = self.unpack_params(z) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf(x, mus[(k*self.D):((k+1)*self.D)], sigmas[(k*self.D):((k+1)*self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def log_prob(self, xs, zs): """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])].""" N = get_dims(xs)[0] # Loop over each mini-batch zs[b,:] log_prob = [] for z in tf.unpack(zs): # Do the unconstrained to constrained transformation for MAP here. pi, mus, sigmas = self.unpack_params(z) pi = tf.sigmoid(pi) pi = tf.concat(0, [pi[0:(self.K-1)], tf.expand_dims(1.0 - tf.reduce_sum(pi[0:(self.K-1)]), 0)]) sigmas = tf.nn.softplus(sigmas) log_prior = dirichlet.logpdf(pi, self.alpha) for k in xrange(self.K): log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c)) log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c)) log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b) log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b) log_lik = tf.constant(0.0, dtype=tf.float32) for x in tf.unpack(xs): for k in xrange(self.K): log_lik += tf.log(pi[k]) log_lik += multivariate_normal.logpdf(x, mus[(k*self.D):((k+1)*self.D)], sigmas[(k*self.D):((k+1)*self.D)]) log_prob += [log_prior + log_lik] return tf.pack(log_prob)
def language_model(X, y): inputs = learn.ops.one_hot_matrix(X, 256) inputs = tf.unpack(inputs, axis=1) target = tf.unpack(y, axis=1) encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(hidden_size),256) output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32) return learn.ops.sequence_classifier(output, target)
def inference(input_var, state_size, vocab_size, num_steps, batch_size, noise_var, decoder_inputs, scope): cell = VarRNN(state_size, noise_var) inputs = tf.unpack(input_var, axis=1) init_state = cell.zero_state(batch_size, tf.float32) softmax_w = tf.get_variable('softmax_w', [state_size, vocab_size]) softmax_b = tf.get_variable('softmax_b', [vocab_size]) outputs, state = tf.nn.seq2seq.embedding_rnn_decoder( inputs, init_state, cell, vocab_size, 32, output_projection=(softmax_w, softmax_b), scope=scope) logits = tf.reshape(tf.concat(1, outputs), [-1, state_size]) logits = tf.matmul(logits, softmax_w) + softmax_b sample_init = cell.zero_state(1, tf.float32) print('got model') scope.reuse_variables() samples, _ = tf.nn.seq2seq.embedding_rnn_decoder( decoder_inputs, sample_init, cell, vocab_size, 32, output_projection=(softmax_w, softmax_b), feed_previous=True, scope=scope) samples = tf.reshape(tf.concat(1, samples), [-1, state_size]) samples = tf.matmul(samples, softmax_w) + softmax_b samples = tf.argmax(samples, 1) samples = tf.unpack(tf.squeeze(samples)) print('got sampling model') return logits, state, init_state, samples
def dynamic_vae_single(T = 50, d_z = 1, d_hidden=2, d_x = 10): # MODEL transition_mat = np.eye(d_z, dtype=np.float32) #GaussianMatrix(mean=0, std=1.0, output_shape=(D, D), name="transition") transition_bias = np.zeros((d_z,), dtype=np.float32) transition_cov = np.eye(d_z, dtype=np.float32) step_noise = MVGaussianMeanCov(transition_bias, transition_cov) w1, w2, b1, b2 = decoder_params(d_z, d_hidden, d_x) z = LinearGaussian(T, transition_bias, transition_cov, transition_mat, transition_bias, transition_cov, name="z") x = VAEDecoderBernoulli(z, w1, w2, b1, b2, name="x") # SYNTHETIC OBSERVATION x_sampled = x.sample(0) q_x = x.observe(x_sampled) # INFERENCE MODEL upwards_messages = VAEEncoder(q_x.sample, d_hidden, d_z) upwards_means = tf.unpack(upwards_messages.mean) upwards_vars = tf.unpack(upwards_messages.variance) unary_factors = [MVGaussianMeanCov(mean, tf.diag(vs)) for (mean, vs) in zip(upwards_means, upwards_vars)] tmat = tf.constant(transition_mat) q_z = LinearGaussianChainCRF((T, d_z), tmat, step_noise, unary_factors) z.attach_q(q_z) return x, z, x_sampled
def _calc_rewards(self, action_list, name="rewards"): action_list = tf.transpose(self.harden_actions(action_list)) action_list = tf.unpack(action_list, FLAGS.batch_size) # batch_size * seq_length token_matrix = tf.transpose(tf.pack(self.input_tokens)) token_matrix = tf.unpack(token_matrix, FLAGS.batch_size) # "Dereference" the predicted sorts, which are index sequences. predicted = [tf.gather(token_matrix[i], action_list[i]) for i in range(FLAGS.batch_size)] # predicted[0] = tf.Print(predicted[0], [predicted[0]], "predicted_" + name, summarize=100) predicted = tf.concat(0, [tf.expand_dims(predicted_i, 0) for predicted_i in predicted]) #predicted = tf.Print(predicted, [predicted], "predicted_" + name, summarize=100) # Compute per-timestep rewards by evaluating constraint violations. rewards = (tf.slice(predicted, [0, 1], [-1, -1]) > tf.slice(predicted, [0, 0], [-1, self.seq_length - 1])) rewards = tf.cast(rewards, tf.float32) # Add reward for t = 0, fixed as 0 rewards = tf.concat(1, [tf.zeros((FLAGS.batch_size, 1)), rewards]) rewards = tf.transpose(rewards) rewards_unpacked = tf.unpack(rewards, self.seq_length, name=name) return rewards, rewards_unpacked
def testCannotInferNumFromUnknownShape(self): x = tf.placeholder(np.float32) with self.assertRaisesRegexp( ValueError, r'Cannot infer num from shape <unknown>'): tf.unpack(x) with self.assertRaisesRegexp( ValueError, r'Cannot infer num from shape <unknown>'): tf.unstack(x)
def testCannotInferNumFromNoneShape(self): x = tf.placeholder(np.float32, shape=(None,)) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): tf.unpack(x) with self.assertRaisesRegexp(ValueError, r'Cannot infer num from shape \(\?,\)'): tf.unstack(x)
def __init__(self, input_size, output_size): self.graph = tf.Graph() self.hyper_cnt = input_size self.save_path = "fit_trend.ckpt" self.collect_counter = 0 self.fit_loss_collect = list() self.stable_loss_predict_collect = list() self.hp_collect = [list() for _ in range(self.hyper_cnt)] self.gradient_collect = [list() for _ in range(self.hyper_cnt)] self.stable_loss_label_collect = list() self.hp_norms = list() self.has_init = False with self.graph.as_default(): # 接收输入 self.ph_hypers = tf.placeholder(tf.float32, shape=[self.hyper_cnt], name='ph_hypers') self.tf_hypers, self.reset_vars = assign_diffable_vars2tensor(self.ph_hypers, self.hyper_cnt) rnn_step = 5 trend_input = tf.concat(0, [self.tf_hypers for _ in range(rnn_step)]) # 通过一个RNN trend_outputs = rnn(trend_input, n_hidden=128) print('rnn output') print(tf.concat(0, trend_outputs)) # RNN接一个DNN trend_output = dnn(tf.concat(0, trend_outputs), [1, output_size]) print('dnn output') print(trend_output) self.predict = trend_output # 实际的trend self.train_label = tf.placeholder(tf.float32, shape=[output_size], name='train_label') # 预测准确率,predict和trend的几何距离 predict_accuracy = tf.sqrt(tf.reduce_sum(tf.square(tf.sub(trend_output, self.train_label)))) / output_size # predict_accuracy /= tf.reduce_mean(tf.concat(0, self.train_label)) # 稳定时损失,最后一个损失 stable_loss = tf.unpack(tf.unpack(trend_output)[0])[-1] print(stable_loss) self.is_fit = tf.placeholder(tf.bool, name='is_fit') self.loss = tf.cond(self.is_fit, lambda: predict_accuracy, lambda: stable_loss) # 优化器 self.var_s = tf.trainable_variables() self.v_hp_s = self.var_s[0: self.hyper_cnt] self.v_fit_s = [v for v in self.var_s if v not in self.v_hp_s] self.grads = var_gradient(self.v_hp_s, self.loss, start_rate=0.1, lrd=False) def optimize_fit(): optimizer_fit = var_optimizer(self.v_fit_s, self.loss) return optimizer_fit def optimize_hp(): optimizer_hp = var_optimizer(self.v_hp_s, self.loss, start_rate=0.1, lrd=False) return optimizer_hp self.optimizer = tf.cond(self.is_fit, optimize_fit, optimize_hp) self.saver = tf.train.Saver()
def log_prob(self, xs, zs): log_prior = tf.pack([norm.logpdf(z, mu, Sigma) for z in tf.unpack(zs)]) # log_lik = tf.pack([ # tf.reduce_sum(norm.logpdf(x, zs[:,0], Sigma)) \ # for x in tf.unpack(xs)]) log_lik = tf.pack([ tf.reduce_sum(norm.logpdf(xs, z, 0*xs+Sigma)) \ for z in tf.unpack(zs)]) return log_lik + log_prior
def get_placeholders(batch_size, sequence_length, num_features): """Make input and target placeholders""" inputs = tf.placeholder(tf.float32, name='all_inputs', shape=[sequence_length, batch_size, num_features]) targets = tf.placeholder(tf.float32, name='all_targets', shape=[sequence_length, batch_size, num_features]) return tf.unpack(inputs), tf.unpack(targets)
def testSimple(self): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): data = np.random.randn(*shape) # Convert data to a single tensorflow tensor x = tf.constant(data) # Unpack into a list of tensors cs_unpacked = tf.unpack(x, num=shape[0]) cs_unstacked = tf.unpack(x, num=shape[0]) for cs in (cs_unpacked, cs_unstacked): self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) cs = [c.eval() for c in cs] self.assertAllEqual(cs, data)
def sequence_loss(self, y_pred, y_true): ''' Loss function for the seq2seq RNN. Reshape predicted and true (label) tensors, generate dummy weights, then use seq2seq.sequence_loss to actually compute the loss function. ''' #print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true)) logits = tf.unpack(y_pred, axis=1) # list of [-1, num_decoder_synbols] elements targets = tf.unpack(y_true, axis=1) # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements #print ("my_sequence_loss logits=%s" % (logits,)) #print ("my_sequence_loss targets=%s" % (targets,)) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] #print ("my_sequence_loss weights=%s" % (weights,)) sl = seq2seq.sequence_loss(logits, targets, weights) #print ("my_sequence_loss return = %s" % sl) return sl
def ndlstm_base_unrolled(inputs, noutput, scope=None, reverse=False): """Run an LSTM, either forward or backward. This is a 1D LSTM implementation using unrolling and the TensorFlow LSTM op. Args: inputs: input sequence (length, batch_size, ninput) noutput: depth of output scope: optional scope name reverse: run LSTM in reverse Returns: Output sequence (length, batch_size, noutput) """ with tf.variable_scope(scope, "SeqLstmUnrolled", [inputs]): length, batch_size, _ = _shape(inputs) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False) state = tf.zeros([batch_size, lstm_cell.state_size]) output_u = [] inputs_u = tf.unpack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(length): with tf.variable_scope(scope, "SeqLstmStep", [inputs_u[i]]): output, state = lstm_cell(inputs_u[i], state) output_u += [output] if reverse: output_u = list(reversed(output_u)) outputs = tf.pack(output_u) return outputs
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None): """Run a softmax layer over all the time steps of an input sequence. Args: inputs: (length, batch_size, depth) tensor noutput: output depth scope: optional scope name name: optional name for output tensor linear_name: name for linear (pre-softmax) output Returns: A tensor of size (length, batch_size, noutput). """ length, _, ninputs = _shape(inputs) inputs_u = tf.unpack(inputs) output_u = [] with tf.variable_scope(scope, "SequenceSoftmax", [inputs]): initial_w = tf.truncated_normal([0 + ninputs, noutput], stddev=0.1) initial_b = tf.constant(0.1, shape=[noutput]) w = tf.contrib.framework.model_variable("weights", initializer=initial_w) b = tf.contrib.framework.model_variable("biases", initializer=initial_b) for i in xrange(length): with tf.variable_scope(scope, "SequenceSoftmaxStep", [inputs_u[i]]): # TODO(tmb) consider using slim.fully_connected(..., # activation_fn=tf.nn.softmax) linear = tf.nn.xw_plus_b(inputs_u[i], w, b, name=linear_name) output = tf.nn.softmax(linear) output_u += [output] outputs = tf.pack(output_u, name=name) return outputs
def sequence_to_final(inputs, noutput, scope=None, name=None, reverse=False): """Run an LSTM across all steps and returns only the final state. Args: inputs: (length, batch_size, depth) tensor noutput: size of output vector scope: optional scope name name: optional name for output tensor reverse: run in reverse Returns: Batch of size (batch_size, noutput). """ with tf.variable_scope(scope, "SequenceToFinal", [inputs]): length, batch_size, _ = _shape(inputs) lstm = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False) state = tf.zeros([batch_size, lstm.state_size]) inputs_u = tf.unpack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(length): with tf.variable_scope(scope, "SequenceToFinalStep", [inputs_u[i]]): output, state = lstm(inputs_u[i], state) outputs = tf.reshape(output, [batch_size, noutput], name=name) return outputs
def _sample_forward(self, back_filtered, eps): samples = [] epses = tf.unpack(eps) sampling_dist = back_filtered[0] z_i = sampling_dist.sample(epses[0]) samples.append(z_i) sampling_dists = [sampling_dist] entropies = [sampling_dist.entropy()] for t in np.arange(1, self.T): pred_mean = tf.matmul(self._transition_mat(t-1), z_i) noise = self._gaussian_noise(t-1) #new_prec_mean = noise.prec_mean() + tf.matmul(noise.prec(), pred_mean) #incoming = MVGaussianNatural(new_prec_mean, noise.prec()) incoming = MVGaussianMeanCov(noise.mean() + pred_mean, noise.cov()) sampling_dist = back_filtered[t].multiply_density(incoming) sampling_dists.append(sampling_dist) z_i = sampling_dist.sample(epses[t]) entropies.append(sampling_dist.entropy()) samples.append(z_i) self.sampling_dists = sampling_dists self.entropies = entropies entropy = tf.reduce_sum(tf.pack(entropies)) sample = tf.reshape(tf.squeeze(tf.pack(samples)), self.output_shape) return sample, entropy
def simple_rnn(incoming, n_units, activation='sigmoid', bias=True, weights_init='truncated_normal', return_seq=False, trainable=True, restore=True, name="SimpleRNN"): """ Simple RNN. Simple Recurrent Layer. Input: 3-D Tensor [samples, timesteps, input dim]. Output: if `return_seq`: 3-D Tensor [samples, timesteps, output dim]. else: 2-D Tensor [samples, output dim]. Arguments: incoming: `Tensor`. Incoming 3-D Tensor. n_units: `int`, number of units for this layer. activation: `str` (name) or `Tensor`. Activation applied to this layer. (See tflearn.activations). Default: 'sigmoid'. bias: `bool`. If True, a bias is used. weights_init: `str` (name) or `Tensor`. Weights initialization. (See tflearn.initializations) Default: 'truncated_normal'. return_seq: `bool`. If True, returns the full sequence instead of last sequence output only. name: `str`. A name for this layer (optional). """ input_shape = utils.get_incoming_shape(incoming) W_init = initializations.get(weights_init)() with tf.name_scope(name) as scope: cell = BasicRNNCell(n_units, activation, bias, W_init, trainable) inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array]: ndim = len(input_shape) assert ndim >= 3, "Input dim should be at least 3." axes = [1, 0] + list(range(2, ndim)) inference = tf.transpose(inference, (axes)) inference = tf.unpack(inference) # Track per layer variables tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + scope, cell.W) if not restore: tf.add_to_collection(tf.GraphKeys.EXCL_RESTORE_VARS, cell.W) if bias: tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + scope, cell.b) if not restore: tf.add_to_collection(tf.GraphKeys.EXCL_RESTORE_VARS, cell.b) outputs, states = _rnn(cell, inference, dtype=tf.float32, scope=scope[:-1]) # Track activations. tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) return outputs if return_seq else outputs[-1]
def log_prob(self, xs, zs): K = self.kernel(xs) log_prior = multivariate_normal.logpdf(zs[:, :], cov=K) log_lik = tf.pack([tf.reduce_sum( \ bernoulli.logpmf(xs[:,0], self.inverse_link(tf.mul(xs[:,0], z))) \ ) for z in tf.unpack(zs)]) return log_prior + log_lik
def rnn_model(x, y): """Recurrent neural network model to predict from sequence of words to a class.""" # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = learn.ops.categorical_variable(x, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = tf.unpack(word_vectors, axis=1) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit. _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32) # Given encoding of RNN, take encoding of last step (e.g hidden size of the # neural network of last step) and pass it as features for logistic # regression over output classes. target = tf.one_hot(y, 15, 1, 0) prediction, loss = learn.models.logistic_regression(encoding, target) # Create a training op. train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return {'class': tf.argmax(prediction, 1), 'prob': prediction}, loss, train_op
def inference (images, train=True): params = [] out = cp_layer(images, "layer1", params, 5, 2, 2, 2, FLAGS.channels, 100) out = cp_layer(out, "layer2", params, 5, 2, 2, 2, 100, 200) out = cp_layer(out, "layer2", params, 3, 1, None, None, 200, 300) out = cp_layer(out, "layer3", params, 3, 1, None, None, 300, 300) if train: out = tf.nn.dropout(out, 0.1, name='dropout') out = cp_layer(out, "score", params, 1, 1, None, None, 300, FLAGS.out_channels, relu=False) score = out with tf.name_scope('upscale'): shape = tf.unpack(tf.shape(images)) print(shape.__class__) shape.pop() shape.append(tf.constant(FLAGS.out_channels, dtype=tf.int32)) print(len(shape)) filters = tf.Variable( tf.truncated_normal( [31, 31, FLAGS.out_channels, FLAGS.out_channels], dtype=tf.float32, stddev=0.01), name='filters') logits = tf.nn.conv2d_transpose(out, filters, tf.pack(shape), [1,16,16,1], padding='SAME', name='upscale') # do we want to add bias? return logits, score, params
def get_batch_tensor(batch_size, sequence_length, num_epochs, filename='names.txt', preprocessor=_clean): """Gets the data in good tensorflow ways. Adds a queue runner so be sure to start it.""" with tf.name_scope('input'): # the data is tiny so just load it, clean it and throw it into a # constant with open(filename) as f: all_data = f.read() # process it all_data = preprocessor(all_data) # just chop off the end to make sure sequence_length * batch_size # divides the total number of records print(all_data) num_batches = all_data.shape[0] // (sequence_length * batch_size) all_data = all_data[:num_batches * sequence_length * batch_size] all_data = np.reshape(all_data, (-1, sequence_length)) # and make the queue data = tf.train.slice_input_producer( [tf.constant(all_data)], num_epochs=num_epochs, shuffle=True, capacity=batch_size*sequence_length) # very much unconvinced this is all the right way round batch = tf.train.batch([data], batch_size=batch_size, enqueue_many=True, num_threads=2) batch = tf.transpose(batch) return tf.unpack(batch)
def cumprod(xs): """Cumulative product of a tensor along first dimension. https://github.com/tensorflow/tensorflow/issues/813 Parameters ---------- x : tf.Tensor vector, matrix, or n-Tensor Returns ------- tf.Tensor A Tensor with `cumprod` applied along its first dimension. """ values = tf.unpack(xs) out = [] prev = tf.ones_like(values[0]) for val in values: s = prev * val out.append(s) prev = s result = tf.pack(out) return result
def Loop(cell, w, i): x = tf.unpack(i, self.NUM_UNROLL) m = tf.zeros_like(x[0]) c = tf.zeros_like(x[0]) for i in range(self.NUM_UNROLL): m, c = cell(x[i], m, c, w) return m
def testInferNum(self): with self.test_session(): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): x = tf.placeholder(np.float32, shape=shape) cs = tf.unpack(x) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0])
def cumprod(xs): """Cumulative product of a tensor along its outer dimension. https://github.com/tensorflow/tensorflow/issues/813 Parameters ---------- xs : tf.Tensor A 1-D or higher tensor. Returns ------- tf.Tensor A tensor with `cumprod` applied along its outer dimension. Raises ------ InvalidArgumentError If the input has Inf or NaN values. """ dependencies = [tf.verify_tensor_all_finite(xs, msg='')] xs = control_flow_ops.with_dependencies(dependencies, xs) xs = tf.cast(xs, dtype=tf.float32) values = tf.unpack(xs) out = [] prev = tf.ones_like(values[0]) for val in values: s = prev * val out.append(s) prev = s result = tf.pack(out) return result
def print_progress(self, t, losses, sess): if t % self.n_print == 0: print("iter %d loss %.2f " % (t, np.mean(losses))) self.variational.print_params(sess) # Sample functions from variational model mean, std = sess.run([self.variational.m, self.variational.s]) rs = np.random.RandomState(0) zs = rs.randn(10, self.variational.num_vars) * std + mean zs = tf.constant(zs, dtype=tf.float32) inputs = np.linspace(-3, 3, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = tf.pack([self.model.mapping(x, z) for z in tf.unpack(zs)]) outputs = sess.run(mus) # Get data y, x = sess.run([self.data.data[:, 0], self.data.data[:, 1]]) # Plot data and functions plt.cla() ax.plot(x, y, 'bx') ax.plot(inputs, outputs.T) ax.set_xlim([-3, 3]) ax.set_ylim([-0.5, 1.5]) plt.draw()
def _tile_along_beam(cls, beam_size, state): if nest.is_sequence(state): return nest_map( lambda val: cls._tile_along_beam(beam_size, val), state ) if not isinstance(state, tf.Tensor): raise ValueError("State should be a sequence or tensor") tensor = state tensor_shape = tensor.get_shape().with_rank_at_least(1) try: new_first_dim = tensor_shape[0] * beam_size except: new_first_dim = None dynamic_tensor_shape = tf.unpack(tf.shape(tensor)) res = tf.expand_dims(tensor, 1) res = tf.tile(res, [1, beam_size] + [1] * (tensor_shape.ndims-1)) res = tf.reshape(res, [-1] + list(dynamic_tensor_shape[1:])) res.set_shape([new_first_dim] + list(tensor_shape[1:])) return res
def unit(x, hidden_memory_tm1): previous_hidden_state, c_prev = tf.unpack(hidden_memory_tm1) # Input Gate i = tf.sigmoid( tf.matmul(x, self.Wi) + tf.matmul(previous_hidden_state, self.Ui) + self.bi ) # Forget Gate f = tf.sigmoid( tf.matmul(x, self.Wf) + tf.matmul(previous_hidden_state, self.Uf) + self.bf ) # Output Gate o = tf.sigmoid( tf.matmul(x, self.Wog) + tf.matmul(previous_hidden_state, self.Uog) + self.bog ) # New Memory Cell c_ = tf.nn.tanh( tf.matmul(x, self.Wc) + tf.matmul(previous_hidden_state, self.Uc) + self.bc ) # Final Memory cell c = f * c_prev + i * c_ # Current Hidden state current_hidden_state = o * tf.nn.tanh(c) return tf.pack([current_hidden_state, c])
def rnn_model(features, target): """RNN model to predict from sequence of words to a class.""" # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = tf.contrib.layers.embed_sequence( features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE, scope='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = tf.unpack(word_vectors, axis=1) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit. _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32) # Given encoding of RNN, take encoding of last step (e.g hidden size of the # neural network of last step) and pass it as features for logistic # regression over output classes. target = tf.one_hot(target, 15, 1, 0) logits = tf.contrib.layers.fully_connected(encoding, 15, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) # Create a training op. train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='Adam', learning_rate=0.01) return ( {'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits)}, loss, train_op)
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): '''Iterates over the time dimension of a tensor. # Arguments inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. mask: binary tensor with shape (samples, time, 1), with a zero for every element that is masked. constants: a list of constant values passed at each step. unroll: with TensorFlow the RNN is always unrolled, but with Theano you can use this boolean flag to unroll the RNN. input_length: not relevant in the TensorFlow implementation. Must be specified if using unrolling with Theano. # Returns A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = len(inputs.get_shape()) assert ndim >= 3, "Input should be at least 3D." axes = [1, 0] + list(range(2, ndim)) inputs = tf.transpose(inputs, (axes)) input_list = tf.unpack(inputs) if constants is None: constants = [] states = initial_states successive_states = [] successive_outputs = [] if go_backwards: input_list.reverse() if mask is not None: # Transpose not supported by bool tensor types, hence round-trip to uint8. mask = tf.cast(mask, tf.uint8) if len(mask.get_shape()) == ndim - 1: mask = expand_dims(mask) mask = tf.cast(tf.transpose(mask, axes), tf.bool) mask_list = tf.unpack(mask) if go_backwards: mask_list.reverse() for input, mask_t in zip(input_list, mask_list): output, new_states = step_function(input, states + constants) # tf.select needs its condition tensor to be the same shape as its two # result tensors, but in our case the condition (mask) tensor is # (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to # broadcast the mask to match the shape of A and B. That's what the # tile call does, is just repeat the mask along its second dimension # ndimensions times. tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]])) if len(successive_outputs) == 0: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = tf.select(tiled_mask_t, output, prev_output) return_states = [] for state, new_state in zip(states, new_states): # (see earlier comment for tile explanation) tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(new_state)[1]])) return_states.append(tf.select(tiled_mask_t, new_state, state)) states = return_states successive_outputs.append(output) successive_states.append(states) else: for input in input_list: output, states = step_function(input, states + constants) successive_outputs.append(output) successive_states.append(states) last_output = successive_outputs[-1] outputs = tf.pack(successive_outputs) new_states = successive_states[-1] axes = [1, 0] + list(range(2, len(outputs.get_shape()))) outputs = tf.transpose(outputs, axes) return last_output, outputs, new_states
inference = ed.MFVI(model, variational, data) sess = inference.initialize(n_print=10) for t in range(1000): loss = inference.update(sess) if t % inference.n_print == 0: print("iter {:d} loss {:.2f}".format(t, loss)) # Sample functions from variational model mean, std = sess.run( [variational.layers[0].m, variational.layers[0].s]) rs = np.random.RandomState(0) zs = rs.randn(10, variational.num_vars) * std + mean zs = tf.constant(zs, dtype=tf.float32) inputs = np.linspace(-8, 8, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1) mus = tf.pack([model.mapping(x, z) for z in tf.unpack(zs)]) outputs = sess.run(mus) # Get data y, x = sess.run([data.data[:, 0], data.data[:, 1]]) # Plot data and functions plt.cla() ax.plot(x, y, 'bx') ax.plot(inputs, outputs.T) ax.set_xlim([-8, 8]) ax.set_ylim([-2, 3]) plt.draw() plt.pause(1.0 / 60.0)
def create_q_network(self): with tf.variable_scope("critic_net") as scope_pi: lstm_layer_input = tf.placeholder("float", [None, self.user_num, self.state_dim]) action_input = tf.placeholder("float", [None, self.user_num, self.state_dim]) step_size = tf.placeholder("float", [1]) initial_lstm_state_forward = tf.placeholder("float", [2, None, self.fc_layer_size]) initial_lstm_state_forward_list = tf.unpack(initial_lstm_state_forward, axis=0) initial_lstm_state_forward_input = tf.nn.rnn_cell.LSTMStateTuple(initial_lstm_state_forward_list[0], initial_lstm_state_forward_list[1]) initial_lstm_state_backward = tf.placeholder("float", [2, None, self.fc_layer_size]) initial_lstm_state_backward_list = tf.unpack(initial_lstm_state_forward, axis=0) initial_lstm_state_backward_input = tf.nn.rnn_cell.LSTMStateTuple(initial_lstm_state_backward_list[0], initial_lstm_state_backward_list[1]) input_s = tf.reshape(lstm_layer_input, [-1, self.state_dim]) input_a = tf.reshape(action_input, [-1, self.action_dim]) # encoder layer parameters W1_s = tf.get_variable("W1_s", [self.state_dim, self.fc_layer_size], initializer=tf.random_uniform([self.state_dim, self.fc_layer_size], -1 / math.sqrt(self.state_dim), 1 / math.sqrt(self.state_dim))) W1_a = tf.get_variable("W1_a", [self.action_dim, self.fc_layer_size], initializer=tf.random_uniform([self.action_dim, self.fc_layer_size], -1 / math.sqrt(self.action_dim), 1 / math.sqrt(self.action_dim))) b1 = tf.get_variable("b1", [self.fc_layer_size], initializer=tf.random_uniform([self.fc_layer_size], -1 / math.sqrt(self.state_dim), 1 / math.sqrt(self.state_dim))) W2_fw = tf.get_variable("W2_fw", [self.fc_layer_size, 1], initializer=tf.random_uniform([self.fc_layer_size, self.action_dim], -1 / math.sqrt(self.fc_layer_size), 1 / math.sqrt(self.fc_layer_size))) W2_bw = tf.get_variable("W2_bw", [self.fc_layer_size, 1], initializer=tf.random_uniform([self.fc_layer_size, self.action_dim], -1 / math.sqrt(self.fc_layer_size), 1 / math.sqrt(self.fc_layer_size))) b2 = tf.get_variable("b1", [1], initializer=tf.random_uniform([self.action_dim], -1 / math.sqrt(self.fc_layer_size), 1 / math.sqrt(self.fc_layer_size))) h_fc = tf.nn.relu(tf.matmul(input_s, W1_s) + tf.matmul(input_a, W1_a) + b1) h_fc1 = tf.reshape(h_fc, [-1, self.user_num, self.fc_layer_size]) with tf.variable_scope('forward'): lstm_forward_cell = tf.nn.rnn_cell.BasicLSTMCell(self.fc_layer_size, state_is_tuple=False) with tf.variable_scope('backward'): lstm_backward_cell = tf.nn.rnn_cell.BasicLSTMCell(self.fc_layer_size, state_is_tuple=False) # "outputs" is a tuple (outputs_forward, outputs_backward). # We set "time_major=True" and [num_user, batch_size, fc_layer_size] (outputs, output_state) = tf.nn.bidirectional_dynamic_rnn(lstm_forward_cell, lstm_backward_cell, h_fc1, initial_state_fw=initial_lstm_state_forward_input , initial_state_bw=initial_lstm_state_backward_input , sequence_length=step_size, time_major=False, scope=scope_pi) output_fw = tf.reshape(outputs[0], [-1, self.fc_layer_size]) output_bw = tf.reshape(outputs[1], [-1, self.fc_layer_size]) # output layer q_value_output = tf.reshape(tf.tanh(tf.matmul(output_fw, W2_fw) + tf.matmul(output_bw, W2_bw) + b2), [-1,self.user_num, 1]) scope_pi.reuse_variables() W_lstm = tf.get_variable("BasicLSTMCell/Linear/Matrix") b_lstm = tf.get_variable("BasicLSTMCell/Linear/Bias") return lstm_layer_input, action_input, q_value_output, [W1_s,W1_a,b1,W2_fw,W2_bw,b2,W_lstm,b_lstm],output_state, initial_lstm_state_forward,initial_lstm_state_backward,step_size
def ppc(model, variational=None, data=Data(), T=None, size=100, sess=tf.Session()): """ Posterior predictive check. (Rubin, 1984; Meng, 1994; Gelman, Meng, and Stern, 1996) If variational is not specified, it defaults to a prior predictive check (Box, 1980). PPC's form an empirical distribution for the predictive discrepancy, p(T) = \int p(T(yrep) | z) p(z | y) dz by drawing replicated data sets yrep and calculating T(yrep) for each data set. Then it compares it to T(y). Parameters ---------- model : Model class object with a 'sample_likelihood' method variational : Variational, optional latent variable distribution q(z) to sample from. It is an approximation to the posterior, e.g., a variational approximation or an empirical distribution from MCMC samples. If not specified, samples will be obtained from model with a 'sample_prior' method. data : Data, optional Observed data to compare to. If not specified, will return only the reference distribution with an assumed replicated data set size of 1. T : function, optional Discrepancy function written in TensorFlow. Default is identity. It is a function taking in a data set y and optionally a set of latent variables z as input. size : int, optional number of replicated data sets sess : tf.Session, optional session used during inference Returns ------- list List containing the reference distribution, which is a Numpy vector of size elements, (T(yrep^{1}, z^{1}), ..., T(yrep^{size}, z^{size})); and the realized discrepancy, which is a NumPy vector of size elements, (T(y, z^{1}), ..., T(y, z^{size})). """ y = data.data if y == None: N = 1 else: N = data.N if T == None: T = lambda y, z=None: y # 1. Sample from posterior (or prior). # We must fetch zs out of the session because sample_likelihood() # may require a SciPy-based sampler. if variational != None: zs, samples = variational.sample(y, size=size) feed_dict = variational.np_sample(samples, size, sess=sess) zs = sess.run(zs, feed_dict) else: zs = model.sample_prior(size=size) zs = sess.run(zs) # 2. Sample from likelihood. yreps = model.sample_likelihood(zs, size=N) # 3. Calculate discrepancy. Tyreps = [] Tys = [] for yrep, z in zip(yreps, tf.unpack(zs)): Tyreps += [T(yrep, z)] if y != None: Tys += [T(y, z)] if y == None: return sess.run(tf.pack(Tyreps), feed_dict) else: return sess.run([tf.pack(Tyreps), tf.pack(Tys)], feed_dict)
def __init__(self, is_training, vocab_size, labels_idx): self.labels_idx = labels_idx labels_size = len(labels_idx) self.vocab_size = vocab_size self.context_steps = FLAGS.context_steps self.question_steps = FLAGS.question_steps self.questions = tf.placeholder(tf.int32, [None, self.question_steps]) self.enc_y = tf.placeholder(tf.int32, [None]) self.bin_y = tf.placeholder(tf.int32, [None, labels_size]) self.ques_lengths = tf.placeholder(tf.int32, [None]) ques_embedding = tf.get_variable( "ques_embedding", [self.vocab_size, FLAGS.qs_size], dtype=data_type()) # bidirectional lstm ques_lstm = BiLSTM(self.questions, self.ques_lengths, is_training, ques_embedding, name='ques') ques_outputs = ques_lstm.outputs[-1] self._initial_state = ques_lstm._initial_state # Use the concatenated hidden states of the final and initial LSTM cells # for prediction. state_fw = ques_lstm.state_fw state_bw = ques_lstm.state_bw hidden_state_fw = state_fw.h hidden_state_bw = state_bw.h hidden_state = tf.concat(1, (hidden_state_fw, hidden_state_bw)) print("Shape of the hidden state %s." % hidden_state.get_shape()) self.contexts = tf.placeholder(tf.int32, shape=[FLAGS.batch_size, FLAGS.context_steps]) self.cont_lengths = tf.placeholder(tf.int32, shape=[None]) # Shape = (batch_size X context_length X ans_hidden_size) context_embedding = tf.get_variable( "context_embedding", [self.vocab_size, FLAGS.ans_size], dtype=tf.float32) context_transformed = tf.nn.embedding_lookup( context_embedding, self.contexts) bilinear = tf.get_variable( "bilinear", [2*FLAGS.qs_size, FLAGS.ans_size], dtype=tf.float32) softmax_W = tf.get_variable( "softmax_W", [FLAGS.ans_size, labels_size]) softmax_b = tf.get_variable("softmax_b", [labels_size]) # Shape = (batch_size X ans_hidden_size) ques_transform = tf.matmul(hidden_state, bilinear) self._logits = [] self._predictions = [] ques_batches = tf.unpack(ques_transform, axis=0) ans_batches = tf.unpack(context_transformed, axis=0) for q_b, a_b in zip(ques_batches, ans_batches): tmp = tf.matmul(tf.expand_dims(q_b, dim=0), tf.transpose(a_b))[0] att = tf.expand_dims(tf.nn.softmax(tmp), 0) cont_final = tf.matmul(att, a_b) self._logits.append( tf.add(tf.matmul(cont_final, softmax_W), softmax_b)[0]) self._cost = cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(self._logits, self.bin_y)) self._predictions = tf.argmax(self._logits, 1) print(self._predictions.get_shape()) correct_preds = tf.equal(tf.to_int32(self._predictions), self.enc_y) self._acc = tf.reduce_mean(tf.cast(correct_preds, "float")) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars), FLAGS.max_grad_norm) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def decoder_rnn(conv_encoder, decoder_inputs, decoder_hidden, weigth_generation, n_steps, bias_generation, batch_size, keep_prob, embedding, sample_rate, lstm_layer=1, is_train=True): with tf.name_scope('decoder_rnn') as scope: lstm_cell = rnn_cell.BasicLSTMCell(decoder_hidden, forget_bias=1.0, state_is_tuple=True) if lstm_layer > 1: lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * lstm_layer) initial_state = lstm_cell.zero_state(batch_size, tf.float32) batch_decoder_inputs = tf.nn.embedding_lookup(embedding, decoder_inputs) batch_decoder_inputs = tf.transpose(batch_decoder_inputs, [1, 0, 2]) batch_decoder_inputs = tf.unpack(batch_decoder_inputs) batch_decoder_inputs = [ tf.concat(1, [batch_decoder_inputs[i], conv_encoder]) for i in range(len(batch_decoder_inputs)) ] if is_train: def func(prev, i): #words prob words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation), bias_generation) sample = tf.argmax(words_prob, 1) prev_word = tf.nn.embedding_lookup(embedding, sample) prev_outputs = tf.concat(1, [prev_word, conv_encoder]) # select from prev_outputs and ground truth prob = tf.random_uniform(minval=0, maxval=1, shape=(batch_size, )) mask = tf.cast(tf.greater(sample_rate, prob), tf.float32) mask = tf.expand_dims(mask, 1) mask = tf.tile(mask, [1, prev_outputs.get_shape().as_list()[-1]]) next_input = mask * prev_outputs + ( 1 - mask) * batch_decoder_inputs[i] return next_input outputs, state = seq2seq.rnn_decoder( decoder_inputs=batch_decoder_inputs, initial_state=initial_state, cell=lstm_cell, loop_function=func, scope='rnn_decoder') else: def func(prev, i): #words prob words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation), bias_generation) sample = tf.argmax(words_prob, 1) prev_word = tf.nn.embedding_lookup(embedding, sample) prev_outputs = tf.concat(1, [prev_word, conv_encoder]) return prev_outputs outputs, state = seq2seq.rnn_decoder( decoder_inputs=batch_decoder_inputs, initial_state=initial_state, cell=lstm_cell, loop_function=func, scope='rnn_decoder') outputs = tf.nn.dropout(outputs, keep_prob) outputs = tf.unpack(outputs) res = [0 for i in range(n_steps)] for i in range(len(outputs)): #words prob res[i] = tf.nn.bias_add(tf.matmul(outputs[i], weigth_generation), bias_generation) return res, state
# 5*15 batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length]) batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length]) # 5*4 init_state = tf.placeholder(tf.float32, [batch_size, state_size]) # 5*4 W = tf.Variable(np.random.rand(state_size + 1, state_size), dtype=tf.float32) b = tf.Variable(np.zeros((1, state_size)), dtype=tf.float32) #1*4 #4*2 W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32) b2 = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32) #1*2 # Unpack columns inputs_series = tf.unpack(batchX_placeholder, axis=1) labels_series = tf.unpack(batchY_placeholder, axis=1) # Forward pass current_state = init_state states_series = [] for current_input in inputs_series: current_input = tf.reshape(current_input, [batch_size, 1]) input_and_state_concatenated = tf.concat( 1, [current_input, current_state]) # Increasing number of columns next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b) # Broadcasted addition states_series.append(next_state) current_state = next_state
def resnet_v1_sdc(inputs, blocks, output_cfg, version, dropout_keep_prob=0.8, bayesian=False, is_training=True, global_pool=True, output_stride=None, lock_root=False, reuse=None, scope=None): """Generator for v1 ResNet models. This function generates a family of ResNet v1 models. See the resnet_v1_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a Block object describing the units in the block. is_training: whether is training or not. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: output: Dict of rank-4 tensors of size [batch, height_out, width_out, channels_out]. endpoints: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: endpoints_collection = sc.name + '_end_points' arg_scope_ep = slim.arg_scope( [slim.conv2d, bottleneck, stack_blocks_dense], outputs_collections=endpoints_collection) arg_scope_train = slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training) with arg_scope_ep, arg_scope_train: nets = [] siamese = True if len(inputs.get_shape()) == 5 else False if siamese: with tf.variable_scope(sc, values=[inputs], reuse=reuse) as scs: # siamese, multi-image config unpacked_inputs = tf.unpack(inputs, axis=1) for i, x in enumerate(unpacked_inputs): branch_scope = 'Branch_%d' % i with tf.name_scope(branch_scope): net, _ = _build_resnet_root( x, block_cfg=blocks, global_pool=global_pool, output_stride=output_stride, lock_root=lock_root) scs.reuse_variables() nets.append(net) else: # normal config global_context = True if version == 7 else False #output_stride = output_stride if version != 7 else 16 net, block_outputs = _build_resnet_root( inputs, block_cfg=blocks, output_stride=output_stride, lock_root=lock_root) if global_context: net = _build_global_context( net, is_training=is_training, bayesian=bayesian, dropout_keep_prob=dropout_keep_prob) if global_pool: # Global average pooling. net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) print('Global pool', net.get_shape()) nets.append(net) if version == 6: # version 6 variant takes an additional global pool from earlier block before the last stride net2 = tf.reduce_mean(block_outputs[11], [1, 2], name='pool5a', keep_dims=True) print('Global pool 2', net2.get_shape()) nets.append(net2) output = _build_output(nets, output_cfg=output_cfg, version=version, is_training=is_training, bayesian=bayesian, dropout_keep_prob=dropout_keep_prob) endpoints = slim.utils.convert_collection_to_dict( endpoints_collection) return output, endpoints
def __init__(self, name): with tf.variable_scope('imply') as scope: # set up placeholders self.partial_obs = tf.placeholder(tf.float32, [N_BATCH, L, 2], name="partial_obs") self.full_obs = tf.placeholder(tf.float32, [N_BATCH, L, 2], name="full_obs") # some constants self.n_hidden = 200 # make hidden represnatation W1 = weight_variable([L * 2, self.n_hidden]) b1 = bias_variable([self.n_hidden]) W2 = weight_variable([self.n_hidden, self.n_hidden]) b2 = bias_variable([self.n_hidden]) partial_flat = tf.reshape(self.partial_obs, [N_BATCH, L * 2]) hidden = tf.nn.relu(tf.matmul(partial_flat, W1) + b1) hidden = tf.nn.relu(tf.matmul(hidden, W2) + b2) W_preds = [weight_variable([self.n_hidden, 2]) for _ in range(L)] b_preds = [bias_variable([2]) for _ in range(L)] e2 = tf.constant(1e-10, shape=[N_BATCH, 2]) self.query_preds = [ tf.nn.softmax(tf.matmul(hidden, W_preds[i]) + b_preds[i]) + e2 for i in range(L) ] print "query_preds shape ", show_dim(self.query_preds) # doing some reshape of the input tensor full_obs_trans = tf.transpose(self.full_obs, perm=[1, 0, 2]) print full_obs_trans.get_shape() full_obs_split = tf.reshape(full_obs_trans, [L, N_BATCH, 2]) full_obs_split = tf.unpack(full_obs_split) print show_dim(full_obs_split) self.query_pred_costs = [] for idx in range(L): blah = -tf.reduce_sum( full_obs_split[idx] * tf.log(self.query_preds[idx])) self.query_pred_costs.append(blah) print "costs shapes ", show_dim(self.query_pred_costs) self.cost_query_pred = sum(self.query_pred_costs) # ------------------------------------------------------------------------ training steps # gvs = optimizer.compute_gradients(cost) # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] # train_op = optimizer.apply_gradients(capped_gvs) # optimizer = tf.train.RMSPropOptimizer(0.0001) # optimizer = tf.train.RMSPropOptimizer(0.0001) optimizer = tf.train.AdagradOptimizer(0.005) pred_gvs = optimizer.compute_gradients(self.cost_query_pred) capped_pred_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in pred_gvs] #train_pred = optimizer.minimize(cost_pred, var_list = VAR_pred) self.train_query_pred = optimizer.apply_gradients(capped_pred_gvs) # train_query_pred = optimizer.minimize(cost_query_pred, var_list = VAR_pred) # Before starting, initialize the variables. We will 'run' this first. self.init = tf.initialize_all_variables() self.saver = tf.train.Saver()
init.run() for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: # Sample functions from variational model mean, std = sess.run([qz.mu, qz.sigma]) rs = np.random.RandomState(0) zs = rs.randn(10, model.n_vars) * std + mean zs = tf.convert_to_tensor(zs, dtype=tf.float32) inputs = np.linspace(-8, 8, num=400, dtype=np.float32) x = tf.expand_dims(inputs, 1) mus = [] for z in tf.unpack(zs): mus += [model.neural_network(x, z)] outputs = tf.pack(mus).eval() # Get data x, y = data['x'], data['y'] # Plot data and functions plt.cla() ax.plot(x, y, 'bx') ax.plot(inputs, outputs.T) ax.set_xlim([-8, 8]) ax.set_ylim([-2, 3]) plt.draw() plt.pause(1.0 / 60.0)
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder('int32', shape=[N, L], name='x') # [num_batch, sentence_len] question = tf.placeholder('int32', shape=[N, Q], name='q') # [num_batch, sentence_len] answer = tf.placeholder('int32', shape=[N], name='y') # [num_batch] - one word answer input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask') # [num_batch, sentence_len] is_training = tf.placeholder(tf.bool) # Prepare parameters gru = rnn_cell.GRUCell(d) # Input module with tf.variable_scope('input') as scope: input_list = tf.unpack(tf.transpose(input)) input_states, _ = seq2seq.embedding_rnn_decoder( input_list, gru.zero_state(N, tf.float32), gru, A, V) # Question module scope.reuse_variables() ques_list = tf.unpack(tf.transpose(question)) questions, _ = seq2seq.embedding_rnn_decoder( ques_list, gru.zero_state(N, tf.float32), gru, A, V) question_vec = questions[-1] # use final state # Masking: to extract fact vectors at end of sentence. (details in paper) input_states = tf.transpose(tf.pack(input_states), [1, 0, 2]) # [N, L, D] facts = [] for n in range(N): filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :]) # [?, D] padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d])) facts.append(tf.concat(0, [filtered, padding])) # [F, D] facked = tf.pack(facts) # packing for transpose... I hate TF so much facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F) # F x [N, D] # Episodic Memory with tf.variable_scope('episodic') as scope: episode = EpisodeModule(d, question_vec, facts) memory = tf.identity(question_vec) for t in range(params.memory_step): memory = gru(episode.new(memory), memory)[0] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A]) logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n( tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdadeltaOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.q = question self.y = answer self.mask = input_mask self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
def trainFine(conf, jointTrain=False, resume=True): # Parameters learning_rate = conf.fine_learning_rate batch_size = conf.fine_batch_size display_step = conf.display_step n_input = conf.psz n_classes = conf.n_classes dropout = conf.dropout imsz = conf.imsz rescale = conf.rescale scale = conf.scale pool_scale = conf.pool_scale x0, x1, x2, y, keep_prob = createPlaceHolders(imsz, rescale, scale, pool_scale, n_classes) locs_ph = tf.placeholder(tf.float32, [conf.batch_size, n_classes, 2]) learning_rate_ph = tf.placeholder(tf.float32, shape=[]) weights = initNetConvWeights(conf) pred_gradient, layers = net_multi_conv(x0, x1, x2, weights, keep_prob, imsz, rescale, pool_scale) baseoutname = '%s_%d.ckpt' % (conf.outname, conf.base_training_iters) basemodelfile = os.path.join(conf.cachedir, baseoutname) sess = tf.Session() saver = tf.train.Saver() pred = tf.stop_gradient(pred_gradient) training_iters = conf.fine_training_iters outname = conf.fineoutname print("Restoring base model from:" + basemodelfile) saver.restore(sess, basemodelfile) # Construct fine model labelT = multiPawTools.createFineLabelTensor(conf) layer1_1 = tf.stop_gradient(layers['base_dict_0']['conv1']) layer1_2 = tf.stop_gradient(layers['base_dict_0']['conv2']) layer2_1 = tf.stop_gradient(layers['base_dict_1']['conv1']) layer2_2 = tf.stop_gradient(layers['base_dict_1']['conv2']) curfine1_1 = extractPatches(layer1_1, pred, conf, 1, 4) curfine1_2 = extractPatches(layer1_2, pred, conf, 2, 2) curfine2_1 = extractPatches(layer2_1, pred, conf, 2, 2) curfine2_2 = extractPatches(layer2_2, pred, conf, 4, 1) curfine1_1u = tf.unpack(tf.transpose(curfine1_1, [1, 0, 2, 3, 4])) curfine1_2u = tf.unpack(tf.transpose(curfine1_2, [1, 0, 2, 3, 4])) curfine2_1u = tf.unpack(tf.transpose(curfine2_1, [1, 0, 2, 3, 4])) curfine2_2u = tf.unpack(tf.transpose(curfine2_2, [1, 0, 2, 3, 4])) finepred = fineOut(curfine1_1u, curfine1_2u, curfine2_1u, curfine2_2u, conf) limgs = multiPawTools.createFineLabelImages(locs_ph, pred, conf, labelT) # training data stuff lmdbfilename = os.path.join(conf.cachedir, conf.trainfilename) vallmdbfilename = os.path.join(conf.cachedir, conf.valfilename) env = lmdb.open(lmdbfilename, readonly=True) valenv = lmdb.open(vallmdbfilename, readonly=True) # Define loss and optimizer costFine = tf.reduce_mean(tf.nn.l2_loss(finepred - tf.to_float(limgs))) costBase = tf.reduce_mean(tf.nn.l2_loss(pred - y)) cost = costFine saver1 = tf.train.Saver(max_to_keep=conf.maxckpt) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate_ph).minimize(cost) outfilename = os.path.join(conf.cachedir, conf.fineoutname) traindatafilename = os.path.join(conf.cachedir, conf.datafinename) latest_ckpt = tf.train.get_checkpoint_state( conf.cachedir, latest_filename=conf.ckptfinename) if not latest_ckpt or not resume: startat = 0 trainData = {'train_err': [], 'val_err': [], 'step_no': []} varlist = tf.all_variables() for var in varlist: try: sess.run(tf.assert_variables_initialized([var])) except tf.errors.FailedPreconditionError: sess.run(tf.initialize_variables([var])) else: saver1.restore(latest_ckpt.model_checkpoint_path) matchObj = re.match(outfilename + '-(\d*)', ckpt.model_checkpoint_path) startat = int(matchObj.group(1) + 1) tdfile = open(traindatafilename, 'rb') trainData = pickle.load(tdfile) tdfile.close() # print('Initializing variable %s'%var.name) # init = tf.initialize_all_variables() # sess.run(init) with env.begin() as txn, valenv.begin() as valtxn: train_cursor = txn.cursor() val_cursor = valtxn.cursor() # Keep training until reach max iterations for step in range(startat, training_iters): excount = step * batch_size cur_lr = learning_rate * conf.gamma**math.floor( old_div(excount, conf.step_size)) batch_xs, locs = multiPawTools.readLMDB(train_cursor, batch_size, imsz, multiResData) locs = multiResData.sanitize_locs(locs) x0_in, x1_in, x2_in = multiPawTools.iScaleImages( batch_xs.transpose([0, 2, 3, 1]), rescale, scale) labelims = multiPawTools.createLabelImages( locs, conf.imsz, conf.pool_scale * conf.rescale, conf.label_blur_rad) feed_dict = { x0: x0_in, x1: x1_in, x2: x2_in, y: labelims, keep_prob: dropout, locs_ph: np.array(locs), learning_rate_ph: cur_lr } sess.run(optimizer, feed_dict=feed_dict) if step % display_step == 0: feed_dict = { x0: x0_in, x1: x1_in, x2: x2_in, y: labelims, keep_prob: 1., locs_ph: np.array(locs) } train_loss = sess.run([cost, costBase], feed_dict=feed_dict) numrep = int(old_div(conf.num_test, conf.batch_size)) + 1 acc = 0 loss = 0 for rep in range(numrep): val_xs, locs = multiPawTools.readLMDB( val_cursor, batch_size, imsz, multiResData) x0_in, x1_in, x2_in = multiPawTools.multiScaleImages( val_xs.transpose([0, 2, 3, 1]), rescale, scale) labelims = multiPawTools.createLabelImages( locs, conf.imsz, conf.pool_scale * conf.rescale, conf.label_blur_rad) feed_dict = { x0: x0_in, x1: x1_in, x2: x2_in, y: labelims, keep_prob: 1., locs_ph: np.array(locs) } loss += sess.run(cost, feed_dict=feed_dict) loss = old_div((old_div(loss, numrep)), batch_size) print("Iter " + str(step) + " Minibatch Loss= " + "{:.3f}".format(loss) + ", Training Loss= " + "{:.3f}".format(old_div(train_loss[0], batch_size)) + ", Base Training Loss= " + "{:.3f}".format(old_div(train_loss[1], batch_size))) trainData['train_err'].append( old_div(train_loss[0], batch_size)) trainData['val_err'].append(loss) trainData['step_no'].append(step) if step % conf.save_step == 0: saver1.save(sess, outfilename, global_step=step, latest_filename=conf.ckptfinename) print('Saved state to %s-%d' % (outfilename, step)) tdfile = open(traindatafilename, 'wb') pickle.dump(trainData, tdfile) tdfile.close() # if step % conf.save_step == 0: # curoutname = '%s_%d.ckpt'% (outname,step) # outfilename = os.path.join(conf.cachedir,curoutname) # saver1.save(sess,outfilename) # print('Saved state to %s' %(outfilename)) step += 1 print("Optimization Finished!") saver1.save(sess, outfilename, global_step=step, latest_filename=conf.ckptfinename) print('Saved state to %s-%d' % (outfilename, step)) tdfile = open(traindatafilename, 'wb') pickle.dump(trainData, tdfile) tdfile.close() sess.close()
def testCannotInferNum(self): x = tf.placeholder(np.float32) with self.assertRaisesRegexp( ValueError, r'Cannot infer num from shape TensorShape\(None\)'): tf.unpack(x)
def fc_v2(inputs, input_dim, output_dim, name, rng, biases=True, init=None, weightnorm=None, gain=1.): """ init: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)` """ #with tf.name_scope(name) as scope: def uniform(stdev, size): if _weights_stdev is not None: stdev = _weights_stdev return rng.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype('float32') if init == 'lecun': # and input_dim != output_dim): # disabling orth. init for now because it's too slow weight_values = uniform(np.sqrt(1. / input_dim), (input_dim, output_dim)) elif init == 'glorot' or (init is None): weight_values = uniform(np.sqrt(2. / (input_dim + output_dim)), (input_dim, output_dim)) elif init == 'he': weight_values = uniform(np.sqrt(2. / input_dim), (input_dim, output_dim)) elif init == 'glorot_he': weight_values = uniform(np.sqrt(4. / (input_dim + output_dim)), (input_dim, output_dim)) elif init == 'orthogonal' or \ (init == None and input_dim == output_dim): # From lasagne def sample(shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are " "supported.") flat_shape = (shape[0], np.prod(shape[1:])) # TODO: why normal and not uniform? a = rng.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return q.astype('float32') weight_values = sample((input_dim, output_dim)) elif init[0] == 'uniform': weight_values = rng.uniform(low=-init[1], high=init[1], size=(input_dim, output_dim)).astype('float32') else: raise Exception('Invalid initialization!') weight_values *= gain weight = tf.get_variable(name + '_W', initializer=tf.constant(weight_values)) if weightnorm == None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0)) # norm_values = np.linalg.norm(weight_values, axis=0) target_norms = tf.get_variable(name + '.g', initializer=tf.constant(norm_values)) with tf.name_scope('weightnorm') as scope: norms = tf.sqrt( tf.reduce_sum(tf.square(weight), reduction_indices=[0])) weight = weight * (target_norms / norms) # if 'Discriminator' in name: # print "WARNING weight constraint on {}".format(name) # weight = tf.nn.softsign(10.*weight)*.1 if inputs.get_shape().ndims == 2: result = tf.matmul(inputs, weight) else: reshaped_inputs = tf.reshape(inputs, [-1, input_dim]) result = tf.matmul(reshaped_inputs, weight) result = tf.reshape( result, tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim])) if biases: bias = tf.get_variable(name + '.b', shape=output_dim, initializer=tf.zeros_initializer()) result = tf.nn.bias_add(result, bias) return result
def __init__(self, args, is_training=True): if not is_training: seq_length = 1 else: seq_length = args.seq_length if args.model == 'rnn': cell_gen = rnn_cell.BasicRNNCell(args.rnn_size) cell_dis = rnn_cell.BasicRNNCell(args.rnn_size) elif args.model == 'gru': cell_gen = rnn_cell.GRUCell(args.rnn_size) cell_dis = rnn_cell.GRUCell(args.rnn_size) elif args.model == 'lstm': cell_gen = rnn_cell.BasicLSTMCell(args.rnn_size) cell_dis = rnn_cell.BasicLSTMCell(args.rnn_size) else: raise Exception('model type not supported: {}'.format(args.model)) # Pass the generated sequences and targets (1) with tf.name_scope('input'): with tf.name_scope('data'): self.input_data = tf.placeholder(tf.int32, [args.batch_size, seq_length]) with tf.name_scope('targets'): self.targets = tf.placeholder(tf.int32, [args.batch_size, seq_length]) ############ # Generator ############ with tf.variable_scope('generator'): self.cell_gen = rnn_cell.MultiRNNCell([cell_gen] * args.num_layers) self.initial_state_gen = self.cell_gen.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnn'): softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable('softmax_b', [args.vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size]) inputs_gen = tf.split(1, seq_length, tf.nn.embedding_lookup( embedding, self.input_data)) inputs_gen = [tf.squeeze(i, [1]) for i in inputs_gen] outputs_gen, last_state_gen = seq2seq.rnn_decoder(inputs_gen, self.initial_state_gen, self.cell_gen, loop_function=None) self.logits_sequence = [] for output_gen in outputs_gen: logits_gen = tf.nn.xw_plus_b(output_gen, softmax_w, softmax_b) self.logits_sequence.append(logits_gen) self.final_state_gen = last_state_gen ################ # Discriminator ################ with tf.variable_scope('discriminator'): self.cell_dis = rnn_cell.MultiRNNCell([cell_dis] * args.num_layers) self.initial_state_dis = self.cell_dis.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnn'): softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2]) softmax_b = tf.get_variable('softmax_b', [2]) inputs_dis = [] embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size]) for logit in self.logits_sequence: inputs_dis.append(tf.matmul(logit, embedding)) # inputs_dis.append(tf.matmul(tf.nn.softmax(logit), embedding)) outputs_dis, last_state_dis = seq2seq.rnn_decoder(inputs_dis, self.initial_state_dis, self.cell_dis, loop_function=None) probs, logits = [], [] for output_dis in outputs_dis: logit = tf.nn.xw_plus_b(output_dis, softmax_w, softmax_b) prob = tf.nn.softmax(logit) logits.append(logit) probs.append(prob) with tf.name_scope('summary'): probs = tf.pack(probs) probs_real = tf.slice(probs, [0,0,1], [args.seq_length, args.batch_size, 1]) variable_summaries(probs_real, 'probability of real') self.final_state_dis = last_state_dis ######### # Train ######### with tf.name_scope('train'): gen_loss = seq2seq.sequence_loss_by_example( logits, tf.unpack(tf.transpose(self.targets)), tf.unpack(tf.transpose(tf.ones_like(self.targets, dtype=tf.float32)))) self.gen_cost = tf.reduce_sum(gen_loss) / args.batch_size tf.scalar_summary('training loss', self.gen_cost) self.lr_gen = tf.Variable(0.0, trainable = False) self.tvars = tf.trainable_variables() gen_vars = [v for v in self.tvars if not v.name.startswith("discriminator/")] if is_training: gen_grads = tf.gradients(self.gen_cost, gen_vars) self.all_grads = tf.gradients(self.gen_cost, self.tvars) gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, args.grad_clip) gen_optimizer = tf.train.AdamOptimizer(self.lr_gen) self.gen_train_op = gen_optimizer.apply_gradients( zip(gen_grads_clipped, gen_vars)) with tf.name_scope('summary'): with tf.name_scope('weight_summary'): for v in self.tvars: variable_summaries(v, v.op.name) if is_training: with tf.name_scope('grad_summary'): for var, grad in zip(self.tvars, self.all_grads): variable_summaries(grad, 'grad/' + var.op.name) self.merged = tf.merge_all_summaries()
def getGraph(self, num_steps, state_size, learningRate=1e-4): graph = tf.Graph() # create new graph with graph.as_default(): with tf.name_scope('data'): inputs = tf.placeholder(self.dtype, [self.batch_size, num_steps, self.segment_len], name='input_placeholder') targets = tf.placeholder(self.dtype, [self.batch_size, self.num_classes], name='labels_placeholder') init_state = tf.placeholder(self.dtype, [self.batch_size, state_size], name='previous_state_placeholder') with tf.name_scope('params'): training = tf.placeholder(tf.bool, name="training") # list where each item have dim 50 x 25 rnn_inputs = tf.unpack(inputs, axis=1, name='rnn_inputs') with tf.variable_scope('rnn_cell'): _ = self.getRnn_W(state_size=state_size) _ = self.getRnn_b(state_size=state_size) _ = self.get_pop_mean(outputDim=state_size) _ = self.get_pop_var(outputDim=state_size) _ = self.get_beta_offset(outputDim=state_size) _ = self.get_scale_gamma(outputDim=state_size) def rnn_cell(rnn_input, the_state): with tf.variable_scope('rnn_cell', reuse=True): with tf.name_scope('rnn_cell_affine_layer'): W = self.getRnn_W(state_size=state_size) b = self.getRnn_b(state_size=state_size) out_affine = tf.matmul( tf.concat(1, [rnn_input, the_state]), W # concat dimension, inputs, so you see that both the state and the inputs are being treated as one ) + b with tf.name_scope('rnn_cell_batch_norm'): batchNorm = self.batchNormWrapper_byExponentialMovingAvg( out_affine, training, get_pop_mean=self.get_pop_mean, get_pop_var=self.get_pop_var, get_beta_offset=self.get_beta_offset, get_scale_gamma=self.get_scale_gamma) with tf.name_scope('rnn_cell_act_func'): rnn_cell_out = tf.tanh(batchNorm) return rnn_cell_out state = init_state rnn_outputs = [] for rnn_inpt in rnn_inputs: state = rnn_cell(rnn_inpt, state) rnn_outputs.append(state) # as we see here the outputs are the state outputs of each rnn. final_state_rnn_outputs = rnn_outputs[-1] # final state with tf.variable_scope('readout'): # readout_weights = tf.Variable( # tf.truncated_normal( # [input_dim, output_dim], stddev=2. / (input_dim + output_dim) ** 0.5 # ), # name='readout_weights' # ) # readout_biases = tf.Variable(tf.zeros([output_dim]), # name='readout_biases') logits = fully_connected_layer_with_batch_norm( "readout", final_state_rnn_outputs, input_dim = state_size, output_dim = self.num_classes, nonlinearity=tf.identity, training=training, ) #logits = tf.matmul(final_state_rnn_outputs, readout_weights) + readout_biases with tf.name_scope('error'): error = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, targets) ) with tf.name_scope('softmax'): # this is only for kaggle softmax = tf.nn.softmax(logits) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(targets, 1)), dtype=self.dtype)) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error) init = tf.global_variables_initializer() self.init = init self.outputs = final_state_rnn_outputs self.inputs = inputs self.targets = targets self.init_state = init_state self.train_step = train_step self.error = error self.accuracy = accuracy self.logits = logits self.softmax = softmax self.training = training return graph
def _add_seq2seq(self): hps = self._hps vsize = self._vocab.NumIds() with tf.variable_scope('seq2seq'): encoder_inputs = tf.unpack( tf.transpose(self._articles, perm=[1, 0, 2])) # We unpack the inputs into one array decoder_inputs = tf.unpack(tf.transpose(self._abstracts)) targets = tf.unpack(tf.transpose(self._targets)) loss_weights = tf.unpack(tf.transpose(self._loss_weights)) article_lens = self._article_lens print("Here") # Embedding shared by the input and outputs. with tf.variable_scope('embedding'), tf.device( self._next_device()): embedding = tf.get_variable( 'embedding', [vsize, hps.emb_dim], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4) ) ## Create a embedding matrix of size vsize*emb_dimension emb_decoder_inputs = [ tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs ] ## TODO: Change decoder embeddings also emb_encoder_inputs = encoder_inputs print("Here", len(emb_encoder_inputs)) for layer_i in xrange(hps.enc_layers): with tf.variable_scope('encoder%d' % layer_i), tf.device( self._next_device()): cell_fw = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123), state_is_tuple=False) cell_bw = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), state_is_tuple=False) (emb_encoder_inputs, fw_state, _) = tf.nn.bidirectional_rnn(cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32, sequence_length=article_lens) print(len(emb_encoder_inputs)) encoder_outputs = emb_encoder_inputs print("Here") with tf.variable_scope('output_projection'), tf.device( self._next_device()): w = tf.get_variable( 'w', [hps.num_hidden, vsize], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4)) w_t = tf.transpose(w) v = tf.get_variable( 'v', [vsize], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4)) print("Here") with tf.variable_scope('decoder'), tf.device(self._next_device()): # When decoding, use model output from the previous step # for the next step. loop_function = None if hps.mode == 'decode': loop_function = _extract_argmax_and_embed( embedding, (w, v), update_embedding=False) cell = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), state_is_tuple=False) encoder_outputs = [ tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden]) for x in encoder_outputs ] self._enc_top_states = tf.concat(1, encoder_outputs) self._dec_in_state = fw_state # During decoding, follow up _dec_in_state are fed from beam_search. # dec_out_state are stored by beam_search for next step feeding. initial_state_attention = (hps.mode == 'decode') decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder( emb_decoder_inputs, self._dec_in_state, self._enc_top_states, cell, num_heads=FLAGS.attn_heads, loop_function=loop_function, initial_state_attention=initial_state_attention) ## Note : Check the effect of changinf num_heads with tf.variable_scope('output'), tf.device(self._next_device()): model_outputs = [] for i in xrange(len(decoder_outputs)): if i > 0: tf.get_variable_scope().reuse_variables() model_outputs.append( tf.nn.xw_plus_b(decoder_outputs[i], w, v)) if hps.mode == 'decode' or hps.mode == 'decode_server': with tf.variable_scope('decode_output'), tf.device( self._next_device()): best_outputs = [tf.argmax(x, 1) for x in model_outputs] tf.logging.info('best_outputs%s', best_outputs[0].get_shape()) self._outputs = tf.concat(1, [ tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs ]) self._topk_log_probs, self._topk_ids = tf.nn.top_k( tf.log(tf.nn.softmax(model_outputs[-1])), hps.batch_size * 2) with tf.variable_scope('loss'), tf.device(self._next_device()): def sampled_loss_func(inputs, labels): #if(True): with tf.device('/cpu:0'): # Try gpu. labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss( w_t, v, inputs, labels, hps.num_softmax_samples, vsize) if hps.num_softmax_samples != 0 and hps.mode == 'train': self._loss = seq2seq_lib.sampled_sequence_loss( decoder_outputs, targets, loss_weights, sampled_loss_func) else: self._loss = tf.nn.seq2seq.sequence_loss( model_outputs, targets, loss_weights) tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
def add_seq2seq(self): hps = self.hps vsize = hps.vocabulary_size threshold=0.5 with tf.variable_scope('seq2seq'): encoder_inputs = tf.unpack(tf.transpose(self.enc_batch)) decoder_inputs = tf.unpack(tf.transpose(self.dec_batch)) sent_encoder_inputs = tf.unpack(tf.transpose(self.sent_enc_batch)) sent_decoder_inputs = tf.unpack(tf.transpose(self.sent_dec_batch)) targets = tf.unpack(tf.transpose(self.target_batch)) extend_targets = tf.unpack(tf.transpose(self.extend_target_batch)) sent_targets = tf.unpack(tf.transpose(self.sent_target_batch)) switch = tf.unpack(tf.transpose(self.switch_batch)) word_weights = tf.unpack(tf.transpose(self.word_weights_batch)) switch_weights = tf.unpack(tf.transpose(self.switch_weights_batch)) sent_decwords=tf.unpack(tf.transpose(self.sent_decwords_batch,perm=[1,0,2])) words_decsent=tf.unpack(tf.transpose(self.words_decsent_batch,perm=[1,0,2])) weights_sent_decwords=tf.unpack(tf.transpose(self.weights_sent_decwords_batch,perm=[1,0,2])) weights_words_decsent=tf.unpack(tf.transpose(self.weights_words_decsent_batch,perm=[1,0,2])) enc_lens = self.enc_input_lens sent_enc_lens = self.sent_enc_input_lens with tf.variable_scope('embedding'): embedding = tf.get_variable( 'word_embedding',dtype=tf.float32, initializer=self.embed) emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x) for x in decoder_inputs] with tf.variable_scope('sent_embedding'): sent_embedding = tf.get_variable( 'sent_embedding', [hps.sent_enc_timesteps, hps.emb_dim], dtype=tf.float32) sent_emb_decoder_inputs = [tf.nn.embedding_lookup(sent_embedding, x) for x in sent_decoder_inputs] for layer_i in xrange(hps.enc_layers): with tf.variable_scope('encoder%d'%layer_i): emb_encoder_inputs=tf.unpack(tf.nn.dropout(emb_encoder_inputs,0.5)) cell_fw = tf.nn.rnn_cell.LSTMCell( hps.num_hidden/2, initializer=tf.contrib.layers.xavier_initializer(uniform=True,seed=123), state_is_tuple=False) cell_bw = tf.nn.rnn_cell.LSTMCell( hps.num_hidden/2, initializer=tf.contrib.layers.xavier_initializer(uniform=True,seed=123), state_is_tuple=False) (emb_encoder_inputs, fw_state, bw_state) = tf.nn.bidirectional_rnn( cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32, sequence_length=enc_lens) encoder_outputs = emb_encoder_inputs sent_i=tf.transpose(encoder_outputs,perm=[1,0,2]) index=tf.transpose(sent_encoder_inputs,perm=[1,0]) sent_ip=tf.pack([tf.gather(sent_i[l],index[l]) for l in xrange(hps.batch_size)]) sent_input=tf.unpack(tf.transpose(sent_ip,perm=[1,0,2])) for layer_i in xrange(hps.enc_layers): with tf.variable_scope('sent_encoder%d'%layer_i): sent_input=tf.unpack(tf.nn.dropout(sent_input,0.5)) cell_sent = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123), state_is_tuple=False) (sent_input, sent_fw_state) = tf.nn.rnn( cell_sent, sent_input, dtype=tf.float32, sequence_length=sent_enc_lens) sent_encoder_outputs = sent_input with tf.variable_scope('decoder'): loop_function = None sent_loop_function = None self.cell = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), state_is_tuple=False) encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, hps.num_hidden]) for x in encoder_outputs] enc_top_states = tf.concat(1, encoder_outputs) dec_in_state=tf.concat(1,[fw_state,bw_state]) with tf.variable_scope('sent_decoder'): self.sent_cell = tf.nn.rnn_cell.LSTMCell( hps.num_hidden, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), state_is_tuple=False) sent_encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, hps.num_hidden]) for x in sent_encoder_outputs] sent_enc_top_states = tf.concat(1, sent_encoder_outputs) if hps.mode== 'train': mode=True else: mode=False sent_dec_in_state = sent_fw_state sent_initial_state_attention = True self.decoder_outputs, self.dec_out_state,self.sent_decoder_outputs, self.sent_dec_out_state,self.switch_output,self.switch_prob,self.decoder_outputs_dists,self.sent_decoder_outputs_dists = seq2seq.attention_decoder( emb_decoder_inputs,encoder_inputs, dec_in_state, enc_top_states,self.cell, sent_emb_decoder_inputs, sent_input,sent_dec_in_state, sent_enc_top_states, self.sent_cell,hps.dec_timesteps,switch=switch,word_weights=word_weights, mode_train=mode,num_heads=1, loop_function=loop_function,sent_loop_function=sent_loop_function, initial_state_attention=sent_initial_state_attention) switch_target=[tf.to_int32(tf.greater_equal(x,1)) for x in switch] final_dists = self._calc_final_dist(self.decoder_outputs_dists, self.sent_decoder_outputs_dists) log_dists = [tf.log(dist+1e-12) for dist in final_dists] with tf.variable_scope('loss'): loss_per_step = [] batch_nums = tf.range(0, limit=hps.batch_size) sent_lens=1 word_lens=1 for dec_step, log_dist in enumerate(log_dists): target = extend_targets[dec_step] indices = tf.stack( (batch_nums, target), axis=1) losses = tf.gather_nd(-log_dist, indices) w=(word_weights[dec_step]/word_lens)+(switch[dec_step]/sent_lens) loss_per_step.append(losses*w) self.loss =tf.reduce_mean(sum(loss_per_step)) self.final_log_dists=final_dists if hps.mode!='decode': with tf.variable_scope('word_loss'): self.word_loss=self.get_loss( self.decoder_outputs, targets,self.word_weights_batch) with tf.variable_scope('sent_loss'): self.sent_loss=self.get_loss( self.sent_decoder_outputs_dists, sent_targets,self.switch_batch) with tf.variable_scope('switch_loss'): self.switch_loss=seq2seq.sequence_loss( self.switch_output,switch_target, switch_weights, softmax_loss_function=None) self.total_loss=self.loss+self.word_loss+self.sent_loss tf.scalar_summary('loss',tf.minimum(12.0, self.loss))
def build_model(words_size, embedding_size, oseq_len, source_len, simplified_len, defendant_nfilters, defendant_width, decoder_hidden, lstm_layer, batch_size, source_nfilters, source_width, is_train): args = construct_data(words_size=words_size, embedding_size=embedding_size, source_len=source_len, simplified_len=simplified_len, oseq_len=oseq_len, decoder_hidden=decoder_hidden, source_nfilters=source_nfilters, source_width=source_width, defendant_nfilters=defendant_nfilters, defendant_width=defendant_width) embedding = args['embedding'] conv_args = args['conv_args'] weigth_generation = args['weigth_generation'] bias_generation = args['bias_generation'] source = args['source'] defendant = args['defendant'] defendant_length = args['defendant_length'] label = args['label'] decoder_inputs = args['decoder_inputs'] loss_weights = args['loss_weights'] keep_prob = args['keep_prob'] sample_rate = args['sample_rate'] conv_encoder = encoder_conv(source=source, defendant=defendant, conv_args=conv_args, keep_prob=keep_prob, embedding=embedding, is_train=is_train) rnn_decoder, state_decoder = decoder_rnn( conv_encoder=conv_encoder, decoder_inputs=decoder_inputs, decoder_hidden=decoder_hidden, weigth_generation=weigth_generation, bias_generation=bias_generation, n_steps=oseq_len, batch_size=batch_size, lstm_layer=lstm_layer, keep_prob=keep_prob, embedding=embedding, sample_rate=sample_rate, is_train=is_train) cost = tf.reduce_mean( seq2seq.sequence_loss_by_example( logits=rnn_decoder, targets=tf.unpack(tf.transpose(label, [1, 0])), weights=tf.unpack( tf.transpose( tf.convert_to_tensor(loss_weights, dtype=tf.float32), [1, 0])))) words_prediction = tf.argmax(tf.transpose(tf.pack(rnn_decoder), [1, 0, 2]), 2) print('build model ') return { 'outputs': rnn_decoder, 'embedding': embedding, 'cost': cost, 'sample_rate': sample_rate, 'words_prediction': words_prediction, 'source': source, 'defendant': defendant, 'defendant_length': defendant_length, 'label': label, 'decoder_inputs': decoder_inputs, 'loss_weights': loss_weights, 'keep_prob': keep_prob }
def bidirectional_rnn(incoming, rnncell_fw, rnncell_bw, return_seq=False, return_states=False, initial_state_fw=None, initial_state_bw=None, dynamic=False, scope=None, name="BiRNN"): """ Bidirectional RNN. Build a bidirectional recurrent neural network, it requires 2 RNN Cells to process sequence in forward and backward order. Any RNN Cell can be used i.e. SimpleRNN, LSTM, GRU... with its own parameters. But the two cells number of units must match. Input: 3-D Tensor Layer [samples, timesteps, input dim]. Output: if `return_seq`: 3-D Tensor [samples, timesteps, output dim]. else: 2-D Tensor Layer [samples, output dim]. Arguments: incoming: `Tensor`. The incoming Tensor. rnncell_fw: `RNNCell`. The RNN Cell to use for foward computation. rnncell_bw: `RNNCell`. The RNN Cell to use for backward computation. return_seq: `bool`. If True, returns the full sequence instead of last sequence output only. return_states: `bool`. If True, returns a tuple with output and states: (output, states). initial_state_fw: `Tensor`. An initial state for the forward RNN. This must be a tensor of appropriate type and shape [batch_size x cell.state_size]. initial_state_bw: `Tensor`. An initial state for the backward RNN. This must be a tensor of appropriate type and shape [batch_size x cell.state_size]. dynamic: `bool`. If True, dynamic computation is performed. It will not compute RNN steps above the sequence length. Note that because TF requires to feed sequences of same length, 0 is used as a mask. So a sequence padded with 0 at the end must be provided. When computation is performed, it will stop when it meets a step with a value of 0. scope: `str`. Define this layer scope (optional). A scope can be used to share variables between layers. Note that scope will override name. name: `str`. A name for this layer (optional). """ assert (rnncell_fw._num_units == rnncell_bw._num_units), \ "RNN Cells number of units must match!" sequence_length = None if dynamic: sequence_length = retrieve_seq_length_op( incoming if isinstance(incoming, tf.Tensor) else tf.pack(incoming)) input_shape = utils.get_incoming_shape(incoming) with tf.variable_scope(scope, name, values=[incoming]) as scope: name = scope.name # TODO: DropoutWrapper inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array]: ndim = len(input_shape) assert ndim >= 3, "Input dim should be at least 3." axes = [1, 0] + list(range(2, ndim)) inference = tf.transpose(inference, (axes)) inference = tf.unpack(inference) outputs, states_fw, states_bw = _brnn( rnncell_fw, rnncell_bw, inference, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length, dtype=tf.float32) c = tf.GraphKeys.LAYER_VARIABLES + '/' + scope.name for v in [rnncell_fw.W, rnncell_fw.b, rnncell_bw.W, rnncell_bw.b]: if hasattr(v, "__len__"): for var in v: tf.add_to_collection(c, var) else: tf.add_to_collection(c, v) # Track activations. tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) if dynamic: if return_seq: o = outputs else: outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) o = advanced_indexing_op(outputs, sequence_length) else: o = outputs if return_seq else outputs[-1] sfw = states_fw sbw = states_bw # Track output tensor. tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, o) return (o, sfw, sbw) if return_states else o
def Linear( name, input_dim, output_dim, inputs, biases=True, initialization=None, weightnorm=None, gain=1.0, ): """ initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)` """ with tf.name_scope(name) as scope: def uniform(stdev, size): if _weights_stdev is not None: stdev = _weights_stdev return np.random.uniform(low=-stdev * np.sqrt(3), high=stdev * np.sqrt(3), size=size).astype("float32") if initialization == "lecun": # and input_dim != output_dim): # disabling orth. init for now because it's too slow weight_values = uniform(np.sqrt(1.0 / input_dim), (input_dim, output_dim)) elif initialization == "glorot" or (initialization == None): weight_values = uniform(np.sqrt(2.0 / (input_dim + output_dim)), (input_dim, output_dim)) elif initialization == "he": weight_values = uniform(np.sqrt(2.0 / input_dim), (input_dim, output_dim)) elif initialization == "glorot_he": weight_values = uniform(np.sqrt(4.0 / (input_dim + output_dim)), (input_dim, output_dim)) elif initialization == "orthogonal" or (initialization == None and input_dim == output_dim): # From lasagne def sample(shape): if len(shape) < 2: raise RuntimeError("Only shapes of length 2 or more are " "supported.") flat_shape = (shape[0], np.prod(shape[1:])) # TODO: why normal and not uniform? a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return q.astype("float32") weight_values = sample((input_dim, output_dim)) elif initialization[0] == "uniform": weight_values = np.random.uniform( low=-initialization[1], high=initialization[1], size=(input_dim, output_dim), ).astype("float32") else: raise Exception("Invalid initialization!") weight_values *= gain weight = lib.param(name + ".W", weight_values) if weightnorm == None: weightnorm = _default_weightnorm if weightnorm: norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0)) # norm_values = np.linalg.norm(weight_values, axis=0) target_norms = lib.param(name + ".g", norm_values) with tf.name_scope("weightnorm") as scope: norms = tf.sqrt( tf.reduce_sum(tf.square(weight), reduction_indices=[0])) weight = weight * (target_norms / norms) # if 'Discriminator' in name: # print "WARNING weight constraint on {}".format(name) # weight = tf.nn.softsign(10.*weight)*.1 if inputs.get_shape().ndims == 2: result = tf.matmul(inputs, weight) else: reshaped_inputs = tf.reshape(inputs, [-1, input_dim]) result = tf.matmul(reshaped_inputs, weight) result = tf.reshape( result, tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim])) if biases: result = tf.nn.bias_add( result, lib.param(name + ".b", np.zeros((output_dim, ), dtype="float32"))) return result
def transform(cls, x, return_log_jac=False): transformed = tf.unpack(x, axis=axis)[idx] if return_log_jac: return transformed, 0.0 else: return transformed
def __init__(self, vocab_size, embedding_size, state_size, num_layers, num_samples, max_seq_length, max_gradient_norm, cell_type, optimizer, learning_rate): self.vocab_size = vocab_size self.embedding_size = embedding_size self.state_size = state_size self.num_layers = num_layers self.max_seq_length = max_seq_length self.max_gradient_norm = max_gradient_norm self.cell_type = cell_type self.num_samples = num_samples self.optimizer = optimizer self.learning_rate = learning_rate self.is_train = True # false for test self.global_step = tf.Variable(0, trainable=False) '''创建输入、目标变量; create encoder and decoder variables''' self.encoder_inputs = tf.placeholder( tf.int32, [self.max_seq_length, None] ) # [max_seq_length * batch_size] tensor representing input sequences, None for variable batch_size self.encoder_lengths = tf.placeholder( tf.int32, [None] ) # [batch_size] tensor recording each sequence's length, used by rnn cell to decide when to finish computing self.decoder_inputs = tf.placeholder( tf.int32, [self.max_seq_length + 2, None] ) # decoder_inputs add the 'GO' and 'EOS' symbol, so 2 more time steps self.decoder_weights = tf.placeholder( tf.float32, [self.max_seq_length + 2, None] ) # for the padded parts in a sequence, the weights are 0.0, which means we don't care about their loss '''创建输出映射; create output projection variables''' # what is output projection? # decoder rnn output at step t (lets call it o_t) is [state_size] dimentional; o_t*w+b is [vocab_size] dimentional, so the decoder generate words by w_t = argmax_w{o_t*w+b} w = tf.get_variable("proj_w", [self.state_size, self.vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.vocab_size]) output_projection = (w, b) # what is softmax_loss_function? # an in-complete softmax model which considers only [num_samples] classes to simplify loss calculation. you don't need to care about the details because the tf.nn.sampled_softmax_loss function do it automatically softmax_loss_function = None if self.num_samples > 0 and self.num_samples < self.vocab_size: def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(weights=w_t, biases=b, inputs=inputs, labels=labels, num_sampled=self.num_samples, num_classes=self.vocab_size) softmax_loss_function = sampled_loss '''创建embedding表和embedding之后的输入; create embedding and embedded inputs''' with tf.device("/cpu:0"): # embedding lookup only works with cpu embedding = tf.get_variable("embedding", [self.vocab_size, self.embedding_size]) embedded_encoder_inputs = tf.unpack( tf.nn.embedding_lookup(embedding, self.encoder_inputs) ) # embedding_lookup function gets a sequence's embedded representation embedded_decoder_inputs = tf.unpack( tf.nn.embedding_lookup(embedding, self.decoder_inputs)) '''创建rnn神经元; create rnn cell''' cell = tf.nn.rnn_cell.BasicLSTMCell(self.state_size, state_is_tuple=True) if cell_type == 'gru': cell = tf.nn.rnn_cell.GRUCell(self.state_size) if self.num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.num_layers) '''创建编码结果; create encoder result''' # here we encode the sequences to encoder_states, note that the encoder_state of a sequence is [num_layers*state_size] dimentional because it records all layers' states encoder_outputs, self.encoder_states = rnn.rnn( cell, embedded_encoder_inputs, sequence_length=self.encoder_lengths, dtype=dtypes.float32) '''创建解码结果; create decoder result''' # weiredly, we need a loop_function here, because: # commonly, the seq-to-seq framework works at two modes: when training, it uses the groundtruth w_t as step-t's input # but when predicting, it uses a loop_function to pass the previous prediction result to current step as the input def loop_function(prev, _): prev = tf.matmul(prev, output_projection[0]) + output_projection[ 1] # get each word's probability prev_symbol = tf.math_ops.argmax( prev, 1) # get the most likely prediction word emb_prev = tf.nn.embedding_lookup( embedding, prev_symbol) # embed the word as the next step's input return emb_prev # here we initialize the decoder_rnn with encoder_states and then try to recover the whole sequence by running the rnn # as it is said above, the decoder will cheat by looking into the groundtruth (only in training) # the decoder_outputs records each step's prediction result self.decoder_outputs, decoder_states = tf.nn.seq2seq.rnn_decoder( embedded_decoder_inputs, self.encoder_states, cell, loop_function=None if self.is_train else loop_function) self.decoder_outputs = [ tf.matmul(one, output_projection[0]) + output_projection[1] for one in self.decoder_outputs ] '''创建损失函数; create loss function''' # as an instance, if a sequence is [GO,w1,w2,w3,EOS],then at step 0, the decoder accept 'GO', and try to predict w1, and so on... therefore decoder_truth is decoder_inputs add 1 decoder_truth = [ tf.unpack(self.decoder_inputs)[i + 1] for i in xrange(self.max_seq_length + 1) ] # loss can by automatically cauculated with tf.nn.seq2seq.sequence_loss, and it is batch-size-normalized. self.loss = tf.nn.seq2seq.sequence_loss( self.decoder_outputs[:-1], decoder_truth, tf.unpack(self.decoder_weights)[:-1]) '''创建梯度; create gradients''' params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) # gradient clip is frequently used in rnn '''创建优化算法; create optimizer''' opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate) if self.optimizer == 'adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) '''创建保存器; create saver''' self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=10)
def get_outputs(self, inputs, input_seq_length, classifier): '''compute the outputs of the decoder Args: inputs: The inputs to the network as a [batch_size x max_input_length x input_dim] tensor input_seq_length: The sequence length of the inputs as a [batch_size] vector classifier: The classifier object that will be used in decoding Returns: A list with batch_size elements containing nbest lists with elements containing pairs of score and output labels ''' #encode the inputs [batch_size x output_length x output_dim] hlfeat = classifier.encoder(self.inputs, self.input_seq_length, False) #repeat the high level features for all beam elements hlfeat = tf.reshape( tf.tile(tf.expand_dims(hlfeat, 1), [1, int(self.conf['beam_width']), 1, 1]), [ int(self.conf['beam_width']) * self.batch_size, int(hlfeat.get_shape()[1]), int(hlfeat.get_shape()[2]) ]) def body(step, beam, first_step=False, check_finished=True): '''the body of the decoding while loop Args: beam: a Beam object containing the current beam first_step: whether or not this is the first step in decoding check_finished: finish a beam element if a sentence border token is observed returns: the loop vars''' with tf.variable_scope('body'): #put the last output in the correct format # [batch_size x beam_width] prev_output = beam.sequences[:, :, step] #put the prev_output and state in the correct shape so all #beam elements from all batches are processed in parallel #[batch_size*beam_width x 1] prev_output = tf.expand_dims(tf.reshape(prev_output, [-1]), 1) states = [ tf.reshape(s, [-1, int(s.get_shape()[2])]) for s in nest.flatten(beam.states) ] states = nest.pack_sequence_as(beam.states, states) #compute the next state and logits logits, states = classifier.decoder(hlfeat=hlfeat, encoder_inputs=prev_output, initial_state=states, first_step=first_step, is_training=False) #get the attenion tensor if first_step: attention_name = ( tf.get_default_graph()._name_stack + '/' + type(classifier.decoder).__name__ + '/attention_decoder/Attention_0/Softmax:0') else: attention_name = (tf.get_default_graph()._name_stack + '/' + type(classifier.decoder).__name__ + '/attention_decoder/attention_decoder/' + 'Attention_0/Softmax:0') attention = tf.get_default_graph().get_tensor_by_name( attention_name) #put the states and logits in the format for the beam states = [ tf.reshape(s, [ self.batch_size, int(self.conf['beam_width']), int(s.get_shape()[1]) ]) for s in nest.flatten(states) ] states = nest.pack_sequence_as(beam.states, states) logits = tf.reshape(logits, [ self.batch_size, int(self.conf['beam_width']), int(logits.get_shape()[2]) ]) attention = tf.reshape(attention, [ self.batch_size, int(self.conf['beam_width']), int(attention.get_shape()[1]) ]) #update the beam beam = beam.update(logits, states, attention, step, check_finished) step = step + 1 return step, beam def cb_cond(step, beam): '''the condition of the decoding while loop Args: step: the decoding step beam: a Beam object containing the current beam returns: a boolean that evaluates to True if the loop should continue''' with tf.variable_scope('cond'): #check if all beam elements have terminated cont = tf.logical_and( tf.logical_not( beam.all_terminated(step, classifier.output_dim - 1)), tf.less(step, int(self.conf['max_steps']))) return cont #initialise the loop variables negmax = tf.tile([[-tf.float32.max]], [self.batch_size, int(self.conf['beam_width']) - 1]) scores = tf.concat([tf.zeros([self.batch_size, 1]), negmax], 1) lengths = tf.ones( [self.batch_size, int(self.conf['beam_width'])], dtype=tf.int32) sequences = tf.constant(classifier.output_dim - 1, shape=[ self.batch_size, int(self.conf['beam_width']), int(self.conf['max_steps']) ], dtype=tf.int32) states = classifier.decoder.zero_state( int(self.conf['beam_width']) * self.batch_size) flat_states = [ tf.reshape(s, [ self.batch_size, int(self.conf['beam_width']), int(s.get_shape()[1]) ]) for s in nest.flatten(states) ] states = nest.pack_sequence_as(states, flat_states) attention = tf.zeros([ self.batch_size, int(self.conf['beam_width']), int(hlfeat.get_shape()[1]), int(self.conf['max_steps']) ]) beam = Beam(sequences, lengths, states, scores, attention) step = tf.constant(0) #do the first step because the initial state should not be used #to compute a context step, beam = body(step, beam, True, False) #run the rest of the decoding loop _, beam = tf.while_loop(cond=cb_cond, body=body, loop_vars=[step, beam], parallel_iterations=1, back_prop=False) with tf.variable_scope('cut_sequences'): #get the beam scores scores = [tf.unpack(s) for s in tf.unpack(beam.scores)] #cut the beam sequences to the correct length and take of #the sequence border tokens sequences = [tf.unpack(s) for s in tf.unpack(beam.sequences)] lengths = [tf.unpack(l) for l in tf.unpack(beam.lengths)] attention = [tf.unpack(a) for a in tf.unpack(beam.attention)] hlfeat = tf.unpack(hlfeat) sequences = [[ sequences[i][j][1:lengths[i][j] - 1] for j in range(len(lengths[i])) ] for i in range(len(lengths))] attention = [[ attention[i][j][:, 1:lengths[i][j]] for j in range(len(lengths[i])) ] for i in range(len(lengths))] outputs = [[(scores[i][j], sequences[i][j], attention[i][j], hlfeat[i]) for j in range(len(sequences[i]))] for i in range(len(sequences))] return outputs
def _testDynamicEquivalentToStaticRNN(self, use_gpu): time_steps = 8 num_units = 3 num_proj = 4 input_size = 5 batch_size = 2 input_values = np.random.randn(time_steps, batch_size, input_size) sequence_length = np.random.randint(0, time_steps, size=batch_size) ########### Step 1: Run static graph and generate readouts with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: concat_inputs = tf.placeholder(tf.float32, shape=(time_steps, batch_size, input_size)) inputs = tf.unpack(concat_inputs) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) cell = tf.nn.rnn_cell.LSTMCell( num_units, input_size, use_peepholes=True, initializer=initializer, num_proj=num_proj) with tf.variable_scope("dynamic_scope"): outputs_static, state_static = tf.nn.rnn( cell, inputs, sequence_length=sequence_length, dtype=tf.float32) feeds = {concat_inputs: input_values} # Initialize tf.initialize_all_variables().run(feed_dict=feeds) # Generate gradients of sum of outputs w.r.t. inputs static_gradients = tf.gradients( outputs_static + [state_static], [concat_inputs]) # Generate gradients of individual outputs w.r.t. inputs static_individual_gradients = _flatten([ tf.gradients(y, [concat_inputs]) for y in [outputs_static[0], outputs_static[-1], state_static]]) # Generate gradients of individual variables w.r.t. inputs trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) assert len(trainable_variables) > 1, ( "Count of trainable variables: %d" % len(trainable_variables)) # pylint: disable=bad-builtin static_individual_variable_gradients = _flatten([ tf.gradients(y, trainable_variables) for y in [outputs_static[0], outputs_static[-1], state_static]]) # Test forward pass values_static = sess.run(outputs_static, feed_dict=feeds) (state_value_static,) = sess.run((state_static,), feed_dict=feeds) # Test gradients to inputs and variables w.r.t. outputs & final state static_grad_values = sess.run(static_gradients, feed_dict=feeds) static_individual_grad_values = sess.run( static_individual_gradients, feed_dict=feeds) static_individual_var_grad_values = sess.run( static_individual_variable_gradients, feed_dict=feeds) ########## Step 2: Run dynamic graph and generate readouts with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess: concat_inputs = tf.placeholder(tf.float32, shape=(time_steps, batch_size, input_size)) inputs = tf.unpack(concat_inputs) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed) cell = tf.nn.rnn_cell.LSTMCell( num_units, input_size, use_peepholes=True, initializer=initializer, num_proj=num_proj) with tf.variable_scope("dynamic_scope"): outputs_dynamic, state_dynamic = tf.nn.dynamic_rnn( cell, inputs=concat_inputs, sequence_length=sequence_length, time_major=True, dtype=tf.float32) split_outputs_dynamic = tf.unpack(outputs_dynamic, time_steps) feeds = {concat_inputs: input_values} # Initialize tf.initialize_all_variables().run(feed_dict=feeds) # Generate gradients of sum of outputs w.r.t. inputs dynamic_gradients = tf.gradients( split_outputs_dynamic + [state_dynamic], [concat_inputs]) # Generate gradients of several individual outputs w.r.t. inputs dynamic_individual_gradients = _flatten([ tf.gradients(y, [concat_inputs]) for y in [split_outputs_dynamic[0], split_outputs_dynamic[-1], state_dynamic]]) # Generate gradients of individual variables w.r.t. inputs trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) assert len(trainable_variables) > 1, ( "Count of trainable variables: %d" % len(trainable_variables)) dynamic_individual_variable_gradients = _flatten([ tf.gradients(y, trainable_variables) for y in [split_outputs_dynamic[0], split_outputs_dynamic[-1], state_dynamic]]) # Test forward pass values_dynamic = sess.run(split_outputs_dynamic, feed_dict=feeds) (state_value_dynamic,) = sess.run( (state_dynamic,), feed_dict=feeds) # Test gradients to inputs and variables w.r.t. outputs & final state dynamic_grad_values = sess.run(dynamic_gradients, feed_dict=feeds) dynamic_individual_grad_values = sess.run( dynamic_individual_gradients, feed_dict=feeds) dynamic_individual_var_grad_values = sess.run( dynamic_individual_variable_gradients, feed_dict=feeds) ######### Step 3: Comparisons self.assertEqual(len(values_static), len(values_dynamic)) for (value_static, value_dynamic) in zip(values_static, values_dynamic): self.assertAllEqual(value_static, value_dynamic) self.assertAllEqual(state_value_static, state_value_dynamic) self.assertAllEqual(static_grad_values, dynamic_grad_values) self.assertEqual(len(static_individual_grad_values), len(dynamic_individual_grad_values)) self.assertEqual(len(static_individual_var_grad_values), len(dynamic_individual_var_grad_values)) for i, (a, b) in enumerate(zip(static_individual_grad_values, dynamic_individual_grad_values)): tf.logging.info("Comparing individual gradients iteration %d" % i) self.assertAllEqual(a, b) for i, (a, b) in enumerate(zip(static_individual_var_grad_values, dynamic_individual_var_grad_values)): tf.logging.info( "Comparing individual variable gradients iteraiton %d" % i) self.assertAllEqual(a, b)
def inference(documents, doc_mask, query, query_mask): embedding = tf.get_variable( 'embedding', [FLAGS.vocab_size, FLAGS.embedding_size], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05)) regularizer = tf.nn.l2_loss(embedding) doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents), FLAGS.dropout_keep_prob) doc_emb.set_shape([None, None, FLAGS.embedding_size]) query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query), FLAGS.dropout_keep_prob) query_emb.set_shape([None, None, FLAGS.embedding_size]) with tf.variable_scope('document', initializer=orthogonal_initializer()): fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size) back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size) doc_len = tf.reduce_sum(doc_mask, reduction_indices=1) h, _ = tf.nn.bidirectional_dynamic_rnn( fwd_cell, back_cell, doc_emb, sequence_length=tf.to_int64(doc_len), dtype=tf.float32) # h_doc = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob) h_doc = tf.concat(2, h) with tf.variable_scope('query', initializer=orthogonal_initializer()): fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size) back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size) query_len = tf.reduce_sum(query_mask, reduction_indices=1) h, _ = tf.nn.bidirectional_dynamic_rnn( fwd_cell, back_cell, query_emb, sequence_length=tf.to_int64(query_len), dtype=tf.float32) # h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob) h_query = tf.concat(2, h) M = tf.batch_matmul(h_doc, h_query, adj_y=True) M_mask = tf.to_float( tf.batch_matmul(tf.expand_dims(doc_mask, -1), tf.expand_dims(query_mask, 1))) alpha = softmax(M, 1, M_mask) beta = softmax(M, 2, M_mask) # query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1) query_importance = tf.expand_dims( tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1) s = tf.squeeze(tf.batch_matmul(alpha, query_importance), [2]) unpacked_s = zip(tf.unpack(s, FLAGS.batch_size), tf.unpack(documents, FLAGS.batch_size)) y_hat = tf.pack([ tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size) for (attentions, sentence_ids) in unpacked_s ]) return y_hat, regularizer
def build_generator(self): image = tf.placeholder(tf.float32, [self.batch_size / 2, self.dim_image]) question = tf.placeholder(tf.int32, [self.batch_size / 2, self.max_words_q]) answer = tf.placeholder(tf.int32, [self.batch_size / 2, self.max_words_q]) # state = tf.zeros([self.batch_size, self.stacked_lstm.state_size]) state_que = tf.zeros( [self.batch_size / 2, self.rnn_size * self.rnn_layer]) #zhe state_ans = tf.zeros( [self.batch_size / 2, self.rnn_size * self.rnn_layer]) #zhe question_ans = tf.concat(0, [question, answer]) loss = 0.0 inputs = tf.nn.embedding_lookup(self.embed_ques_W, question_ans) inputs = tf.unpack(tf.transpose(inputs, [1, 0, 2])) tf.get_variable_scope().reuse_variables() # pdb.set_trace() #output, _, _ = tf.nn.bidirectional_rnn(self.forward_dropout, self.backward_dropout, inputs, dtype=tf.float32) #state_que = output[-1][0:250,:] #state_ans = output[-1][250:,:] output, state_fw, state_bw = tf.nn.bidirectional_rnn( self.forward_dropout, self.backward_dropout, inputs, dtype=tf.float32) #state = tf.concat(1,[state_fw, state_bw]) state = tf.mul(state_fw, state_bw) state_que = state[0:250, :] state_ans = state[250:, :] ''' for i in range(max_words_q): if i==0: blstm_emb_linear = tf.zeros([self.batch_size, self.input_embedding_size]) else: tf.get_variable_scope().reuse_variables() blstm_emb_linear = tf.nn.embedding_lookup(self.embed_ques_W, question_ans[:,i-1]) blstm_emb_drop = tf.nn.dropout(blstm_emb_linear, 1-self.drop_out_rate) blstm_emb = tf.tanh(blstm_emb_drop) output, state = self.stacked_lstm(blstm_emb, state) state_que = state[0:250,:] #zhe state_ans = state[250:,:] #zhe ''' # multimodal (fusing question & image) Q_drop = tf.nn.dropout(state_que, 1 - self.drop_out_rate) Q_linear = tf.nn.xw_plus_b(Q_drop, self.embed_Q_W, self.embed_Q_b) Q_emb = tf.tanh(Q_linear) image_drop = tf.nn.dropout(image, 1 - self.drop_out_rate) image_linear = tf.nn.xw_plus_b(image_drop, self.embed_image_W, self.embed_image_b) image_emb = tf.tanh(image_linear) A_drop = tf.nn.dropout(state_ans, 1 - self.drop_out_rate) A_linear = tf.nn.xw_plus_b(A_drop, self.embed_A_W, self.embed_A_b) A_emb = tf.tanh(A_linear) QI = tf.mul(Q_emb, image_emb) QI_drop = tf.nn.dropout(QI, 1 - self.drop_out_rate) QI_linear = tf.nn.xw_plus_b(QI_drop, self.embed_QI_W, self.embed_QI_b) QI_emb = tf.tanh(QI_linear) QIA = tf.mul(QI_emb, A_emb) scores_emb = tf.nn.xw_plus_b(QIA, self.embed_scor_W, self.embed_scor_b) #zhe # Calculate cross entropy #cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores_emb, labels=label) #zhe generated_ANS = tf.transpose(scores_emb) return generated_ANS, image, question, answer
print(y) batchX_placeholder = tf.placeholder( tf.float32, [batch_size, truncated_backprop_length]) # 5, 15 batchY_placeholder = tf.placeholder( tf.int32, [batch_size, truncated_backprop_length]) # 5, 15 init_state = tf.placeholder(tf.float32, [batch_size, state_size]) # 5, 4 W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32) b2 = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32) # Unpack columns inputs_series = tf.split(1, truncated_backprop_length, batchX_placeholder) #inputs_series = tf.unpack(batchX_placeholder, 1) labels_series = tf.unpack(batchY_placeholder, 1) # Forward passes cell = tf.nn.rnn_cell.BasicRNNCell(state_size) states_series, current_state = tf.nn.rnn(cell, inputs_series, init_state) logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition predictions_series = [tf.nn.softmax(logits) for logits in logits_series] losses = [ tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) for logits, labels in zip(logits_series, labels_series) ] total_loss = tf.reduce_mean(losses)
def _rnn_template(incoming, cell, dropout=None, return_seq=False, return_state=False, initial_state=None, dynamic=False, scope=None, name="LSTM"): """ RNN Layer Template. """ sequence_length = None if dynamic: sequence_length = retrieve_seq_length_op( incoming if isinstance(incoming, tf.Tensor) else tf.pack(incoming)) input_shape = utils.get_incoming_shape(incoming) # Variable Scope fix for older TF try: vscope = tf.variable_scope(scope, default_name=name, values=[incoming]) except Exception: vscope = tf.variable_op_scope([incoming], scope, name) with vscope as scope: name = scope.name _cell = cell # Apply dropout if dropout: if type(dropout) in [tuple, list]: in_keep_prob = dropout[0] out_keep_prob = dropout[1] elif isinstance(dropout, float): in_keep_prob, out_keep_prob = dropout, dropout else: raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") cell = DropoutWrapper(cell, in_keep_prob, out_keep_prob) inference = incoming # If a tensor given, convert it to a per timestep list if type(inference) not in [list, np.array]: ndim = len(input_shape) assert ndim >= 3, "Input dim should be at least 3." axes = [1, 0] + list(range(2, ndim)) inference = tf.transpose(inference, (axes)) inference = tf.unpack(inference) outputs, state = _rnn(cell, inference, dtype=tf.float32, initial_state=initial_state, scope=name, sequence_length=sequence_length) # Retrieve RNN Variables c = tf.GraphKeys.LAYER_VARIABLES + '/' + scope.name for v in [_cell.W, _cell.b]: if hasattr(v, "__len__"): for var in v: tf.add_to_collection(c, var) else: tf.add_to_collection(c, v) # Track activations. tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1]) if dynamic: if return_seq: o = outputs else: outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) o = advanced_indexing_op(outputs, sequence_length) else: o = outputs if return_seq else outputs[-1] # Track output tensor. tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, o) return (o, state) if return_state else o
def main(args): network = importlib.import_module(args.model_def, 'inference') subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, args.filter_filename, args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: #pretrained_model = os.path.expanduser(args.pretrained_model) ## edit by mzh meta_file, ckpt_file = facenet.get_model_filenames(args.pretrained_model) pretrained_model = os.path.join(os.path.expanduser(args.pretrained_model), ckpt_file) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) image_list, label_list = facenet.shuffle_examples(image_list, label_list) learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None,1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1,), (1,)], shared_name=None, name=None) ## enque_op input the images/labels to the queue to be read later enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): # multi threads to read the element in the queue (i.e. images, labels) filenames, label = input_queue.dequeue() images = [] for filename in tf.unpack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_png(file_contents) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) # if using mutlti threads to read the image, labels parallism , it needs to use tf.train.batch_join instead of the tf.train.batch or tf.train.shuffle_batch to produce the image / label batch to train or evaluate image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor>0.0: prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, label_batch, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) ## This is the start to run the input pipeline filling the example queue so that the dequeue can get the examples tf.train.start_queue_runners(sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) return model_dir