Esempio n. 1
0
    def log_prob(self, xs, zs):
        """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])]."""
        N = get_dims(xs)[0]
        # Loop over each mini-batch zs[b,:]
        log_prob = []
        for z in tf.unpack(zs):
            pi, mus, sigmas = self.unpack_params(z)
            log_prior = dirichlet.logpdf(pi, self.alpha)
            for k in xrange(self.K):
                log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c))
                log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c))
                log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b)
                log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b)

            log_lik = tf.constant(0.0, dtype=tf.float32)
            for x in tf.unpack(xs):
                for k in xrange(self.K):
                    log_lik += tf.log(pi[k])
                    log_lik += multivariate_normal.logpdf(x,
                        mus[(k*self.D):((k+1)*self.D)],
                        sigmas[(k*self.D):((k+1)*self.D)])

            log_prob += [log_prior + log_lik]

        return tf.pack(log_prob)
Esempio n. 2
0
    def log_prob(self, xs, zs):
        """Returns a vector [log p(xs, zs[1,:]), ..., log p(xs, zs[S,:])]."""
        N = get_dims(xs)[0]
        # Loop over each mini-batch zs[b,:]
        log_prob = []
        for z in tf.unpack(zs):
            # Do the unconstrained to constrained transformation for MAP here.
            pi, mus, sigmas = self.unpack_params(z)
            pi = tf.sigmoid(pi)
            pi = tf.concat(0, [pi[0:(self.K-1)],
                         tf.expand_dims(1.0 - tf.reduce_sum(pi[0:(self.K-1)]), 0)])
            sigmas = tf.nn.softplus(sigmas)
            log_prior = dirichlet.logpdf(pi, self.alpha)
            for k in xrange(self.K):
                log_prior += norm.logpdf(mus[k*self.D], 0, np.sqrt(self.c))
                log_prior += norm.logpdf(mus[k*self.D+1], 0, np.sqrt(self.c))
                log_prior += invgamma.logpdf(sigmas[k*self.D], self.a, self.b)
                log_prior += invgamma.logpdf(sigmas[k*self.D+1], self.a, self.b)

            log_lik = tf.constant(0.0, dtype=tf.float32)
            for x in tf.unpack(xs):
                for k in xrange(self.K):
                    log_lik += tf.log(pi[k])
                    log_lik += multivariate_normal.logpdf(x,
                        mus[(k*self.D):((k+1)*self.D)],
                        sigmas[(k*self.D):((k+1)*self.D)])

            log_prob += [log_prior + log_lik]

        return tf.pack(log_prob)
Esempio n. 3
0
 def language_model(X, y):
   inputs = learn.ops.one_hot_matrix(X, 256)
   inputs = tf.unpack(inputs, axis=1)
   target = tf.unpack(y, axis=1)
   encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(hidden_size),256)
   output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32)
   return learn.ops.sequence_classifier(output, target)
Esempio n. 4
0
File: var_rnn.py Progetto: PFCM/rnns
def inference(input_var, state_size, vocab_size, num_steps, batch_size, noise_var,
              decoder_inputs, scope):

    cell = VarRNN(state_size, noise_var)

    inputs = tf.unpack(input_var, axis=1)
    init_state = cell.zero_state(batch_size, tf.float32)
    softmax_w = tf.get_variable('softmax_w', [state_size, vocab_size])
    softmax_b = tf.get_variable('softmax_b', [vocab_size])
    outputs, state = tf.nn.seq2seq.embedding_rnn_decoder(
        inputs, init_state, cell, vocab_size, 32,
        output_projection=(softmax_w, softmax_b), scope=scope)
    logits = tf.reshape(tf.concat(1, outputs), [-1, state_size])
    logits = tf.matmul(logits, softmax_w) + softmax_b

    sample_init = cell.zero_state(1, tf.float32)
    print('got model')
    scope.reuse_variables()
    samples, _ = tf.nn.seq2seq.embedding_rnn_decoder(
        decoder_inputs, sample_init, cell, vocab_size, 32,
        output_projection=(softmax_w, softmax_b), feed_previous=True,
        scope=scope)
    samples = tf.reshape(tf.concat(1, samples), [-1, state_size])
    samples = tf.matmul(samples, softmax_w) + softmax_b
    samples = tf.argmax(samples, 1)
    samples = tf.unpack(tf.squeeze(samples))
    print('got sampling model')
    

    return logits, state, init_state, samples
Esempio n. 5
0
def dynamic_vae_single(T = 50, d_z = 1, d_hidden=2, d_x = 10):

    # MODEL
    transition_mat = np.eye(d_z, dtype=np.float32) #GaussianMatrix(mean=0, std=1.0, output_shape=(D, D), name="transition")
    transition_bias = np.zeros((d_z,), dtype=np.float32)
    transition_cov = np.eye(d_z, dtype=np.float32)
    step_noise = MVGaussianMeanCov(transition_bias, transition_cov)

    w1, w2, b1, b2 = decoder_params(d_z, d_hidden, d_x)

    z = LinearGaussian(T, transition_bias, transition_cov,
                                          transition_mat, transition_bias, transition_cov,
                                          name="z")
    x = VAEDecoderBernoulli(z, w1, w2, b1, b2, name="x")

    # SYNTHETIC OBSERVATION
    x_sampled = x.sample(0)
    q_x = x.observe(x_sampled)

    # INFERENCE MODEL
    upwards_messages = VAEEncoder(q_x.sample, d_hidden, d_z)
    upwards_means = tf.unpack(upwards_messages.mean)
    upwards_vars = tf.unpack(upwards_messages.variance)
    unary_factors = [MVGaussianMeanCov(mean, tf.diag(vs)) for (mean, vs) in zip(upwards_means, upwards_vars)]
    tmat = tf.constant(transition_mat)
    q_z = LinearGaussianChainCRF((T, d_z), tmat, step_noise, unary_factors)
    z.attach_q(q_z)

    return x, z, x_sampled
Esempio n. 6
0
  def _calc_rewards(self, action_list, name="rewards"):
    action_list = tf.transpose(self.harden_actions(action_list))
    action_list = tf.unpack(action_list, FLAGS.batch_size)

    # batch_size * seq_length
    token_matrix = tf.transpose(tf.pack(self.input_tokens))
    token_matrix = tf.unpack(token_matrix, FLAGS.batch_size)

    # "Dereference" the predicted sorts, which are index sequences.
    predicted = [tf.gather(token_matrix[i], action_list[i])
                 for i in range(FLAGS.batch_size)]
#    predicted[0] = tf.Print(predicted[0], [predicted[0]], "predicted_" + name, summarize=100)
    predicted = tf.concat(0, [tf.expand_dims(predicted_i, 0)
                              for predicted_i in predicted])
    #predicted = tf.Print(predicted, [predicted], "predicted_" + name, summarize=100)

    # Compute per-timestep rewards by evaluating constraint violations.
    rewards = (tf.slice(predicted, [0, 1], [-1, -1])
               > tf.slice(predicted, [0, 0], [-1, self.seq_length - 1]))
    rewards = tf.cast(rewards, tf.float32)
    # Add reward for t = 0, fixed as 0
    rewards = tf.concat(1, [tf.zeros((FLAGS.batch_size, 1)),
                            rewards])

    rewards = tf.transpose(rewards)
    rewards_unpacked = tf.unpack(rewards, self.seq_length,
                                 name=name)

    return rewards, rewards_unpacked
Esempio n. 7
0
 def testCannotInferNumFromUnknownShape(self):
   x = tf.placeholder(np.float32)
   with self.assertRaisesRegexp(
       ValueError, r'Cannot infer num from shape <unknown>'):
     tf.unpack(x)
   with self.assertRaisesRegexp(
       ValueError, r'Cannot infer num from shape <unknown>'):
     tf.unstack(x)
Esempio n. 8
0
 def testCannotInferNumFromNoneShape(self):
   x = tf.placeholder(np.float32, shape=(None,))
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape \(\?,\)'):
     tf.unpack(x)
   with self.assertRaisesRegexp(ValueError,
                                r'Cannot infer num from shape \(\?,\)'):
     tf.unstack(x)
Esempio n. 9
0
    def __init__(self, input_size, output_size):
        self.graph = tf.Graph()
        self.hyper_cnt = input_size
        self.save_path = "fit_trend.ckpt"

        self.collect_counter = 0
        self.fit_loss_collect = list()
        self.stable_loss_predict_collect = list()
        self.hp_collect = [list() for _ in range(self.hyper_cnt)]
        self.gradient_collect = [list() for _ in range(self.hyper_cnt)]
        self.stable_loss_label_collect = list()

        self.hp_norms = list()
        self.has_init = False

        with self.graph.as_default():
            # 接收输入
            self.ph_hypers = tf.placeholder(tf.float32, shape=[self.hyper_cnt], name='ph_hypers')
            self.tf_hypers, self.reset_vars = assign_diffable_vars2tensor(self.ph_hypers, self.hyper_cnt)
            rnn_step = 5
            trend_input = tf.concat(0, [self.tf_hypers for _ in range(rnn_step)])
            # 通过一个RNN
            trend_outputs = rnn(trend_input, n_hidden=128)
            print('rnn output')
            print(tf.concat(0, trend_outputs))
            # RNN接一个DNN
            trend_output = dnn(tf.concat(0, trend_outputs), [1, output_size])
            print('dnn output')
            print(trend_output)
            self.predict = trend_output
            # 实际的trend
            self.train_label = tf.placeholder(tf.float32, shape=[output_size], name='train_label')
            # 预测准确率,predict和trend的几何距离
            predict_accuracy = tf.sqrt(tf.reduce_sum(tf.square(tf.sub(trend_output, self.train_label)))) / output_size
            # predict_accuracy /= tf.reduce_mean(tf.concat(0, self.train_label))
            # 稳定时损失,最后一个损失
            stable_loss = tf.unpack(tf.unpack(trend_output)[0])[-1]
            print(stable_loss)
            self.is_fit = tf.placeholder(tf.bool, name='is_fit')
            self.loss = tf.cond(self.is_fit, lambda: predict_accuracy, lambda: stable_loss)

            # 优化器
            self.var_s = tf.trainable_variables()
            self.v_hp_s = self.var_s[0: self.hyper_cnt]
            self.v_fit_s = [v for v in self.var_s if v not in self.v_hp_s]
            self.grads = var_gradient(self.v_hp_s, self.loss, start_rate=0.1, lrd=False)

            def optimize_fit():
                optimizer_fit = var_optimizer(self.v_fit_s, self.loss)
                return optimizer_fit

            def optimize_hp():
                optimizer_hp = var_optimizer(self.v_hp_s, self.loss, start_rate=0.1, lrd=False)
                return optimizer_hp

            self.optimizer = tf.cond(self.is_fit, optimize_fit, optimize_hp)
            self.saver = tf.train.Saver()
Esempio n. 10
0
 def log_prob(self, xs, zs):
     log_prior = tf.pack([norm.logpdf(z, mu, Sigma)
                     for z in tf.unpack(zs)])
     # log_lik = tf.pack([
     #     tf.reduce_sum(norm.logpdf(x, zs[:,0], Sigma)) \
     #     for x in tf.unpack(xs)])
     log_lik = tf.pack([
         tf.reduce_sum(norm.logpdf(xs, z, 0*xs+Sigma)) \
         for z in tf.unpack(zs)])
     return log_lik + log_prior
Esempio n. 11
0
def get_placeholders(batch_size, sequence_length, num_features):
    """Make input and target placeholders"""
    inputs = tf.placeholder(tf.float32, name='all_inputs',
                            shape=[sequence_length,
                                   batch_size,
                                   num_features])
    targets = tf.placeholder(tf.float32, name='all_targets',
                             shape=[sequence_length,
                                    batch_size,
                                    num_features])

    return tf.unpack(inputs), tf.unpack(targets)
Esempio n. 12
0
 def testSimple(self):
   np.random.seed(7)
   with self.test_session(use_gpu=True):
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
       data = np.random.randn(*shape)
       # Convert data to a single tensorflow tensor
       x = tf.constant(data)
       # Unpack into a list of tensors
       cs_unpacked = tf.unpack(x, num=shape[0])
       cs_unstacked = tf.unpack(x, num=shape[0])
       for cs in (cs_unpacked, cs_unstacked):
         self.assertEqual(type(cs), list)
         self.assertEqual(len(cs), shape[0])
         cs = [c.eval() for c in cs]
         self.assertAllEqual(cs, data)
Esempio n. 13
0
 def sequence_loss(self, y_pred, y_true):
     '''
     Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
     then use seq2seq.sequence_loss to actually compute the loss function.
     '''
     #print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
     logits = tf.unpack(y_pred, axis=1)		# list of [-1, num_decoder_synbols] elements
     targets = tf.unpack(y_true, axis=1)		# y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
     #print ("my_sequence_loss logits=%s" % (logits,))
     #print ("my_sequence_loss targets=%s" % (targets,))
     weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
     #print ("my_sequence_loss weights=%s" % (weights,))
     sl = seq2seq.sequence_loss(logits, targets, weights)
     #print ("my_sequence_loss return = %s" % sl)
     return sl
Esempio n. 14
0
def ndlstm_base_unrolled(inputs, noutput, scope=None, reverse=False):
  """Run an LSTM, either forward or backward.

  This is a 1D LSTM implementation using unrolling and the TensorFlow
  LSTM op.

  Args:
    inputs: input sequence (length, batch_size, ninput)
    noutput: depth of output
    scope: optional scope name
    reverse: run LSTM in reverse

  Returns:
    Output sequence (length, batch_size, noutput)

  """
  with tf.variable_scope(scope, "SeqLstmUnrolled", [inputs]):
    length, batch_size, _ = _shape(inputs)
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False)
    state = tf.zeros([batch_size, lstm_cell.state_size])
    output_u = []
    inputs_u = tf.unpack(inputs)
    if reverse:
      inputs_u = list(reversed(inputs_u))
    for i in xrange(length):
      with tf.variable_scope(scope, "SeqLstmStep", [inputs_u[i]]):
        output, state = lstm_cell(inputs_u[i], state)
        output_u += [output]
    if reverse:
      output_u = list(reversed(output_u))
    outputs = tf.pack(output_u)
    return outputs
Esempio n. 15
0
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None):
  """Run a softmax layer over all the time steps of an input sequence.

  Args:
    inputs: (length, batch_size, depth) tensor
    noutput: output depth
    scope: optional scope name
    name: optional name for output tensor
    linear_name: name for linear (pre-softmax) output

  Returns:
    A tensor of size (length, batch_size, noutput).

  """
  length, _, ninputs = _shape(inputs)
  inputs_u = tf.unpack(inputs)
  output_u = []
  with tf.variable_scope(scope, "SequenceSoftmax", [inputs]):
    initial_w = tf.truncated_normal([0 + ninputs, noutput], stddev=0.1)
    initial_b = tf.constant(0.1, shape=[noutput])
    w = tf.contrib.framework.model_variable("weights", initializer=initial_w)
    b = tf.contrib.framework.model_variable("biases", initializer=initial_b)
    for i in xrange(length):
      with tf.variable_scope(scope, "SequenceSoftmaxStep", [inputs_u[i]]):
        # TODO(tmb) consider using slim.fully_connected(...,
        # activation_fn=tf.nn.softmax)
        linear = tf.nn.xw_plus_b(inputs_u[i], w, b, name=linear_name)
        output = tf.nn.softmax(linear)
        output_u += [output]
    outputs = tf.pack(output_u, name=name)
  return outputs
Esempio n. 16
0
def sequence_to_final(inputs, noutput, scope=None, name=None, reverse=False):
  """Run an LSTM across all steps and returns only the final state.

  Args:
    inputs: (length, batch_size, depth) tensor
    noutput: size of output vector
    scope: optional scope name
    name: optional name for output tensor
    reverse: run in reverse

  Returns:
    Batch of size (batch_size, noutput).
  """
  with tf.variable_scope(scope, "SequenceToFinal", [inputs]):
    length, batch_size, _ = _shape(inputs)
    lstm = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False)
    state = tf.zeros([batch_size, lstm.state_size])
    inputs_u = tf.unpack(inputs)
    if reverse:
      inputs_u = list(reversed(inputs_u))
    for i in xrange(length):
      with tf.variable_scope(scope, "SequenceToFinalStep", [inputs_u[i]]):
        output, state = lstm(inputs_u[i], state)
    outputs = tf.reshape(output, [batch_size, noutput], name=name)
    return outputs
Esempio n. 17
0
    def _sample_forward(self, back_filtered, eps):
        samples = []

        epses = tf.unpack(eps)

        sampling_dist = back_filtered[0]
        z_i = sampling_dist.sample(epses[0])
        samples.append(z_i)

        sampling_dists = [sampling_dist]        
        entropies = [sampling_dist.entropy()]
        for t in np.arange(1, self.T):
            pred_mean = tf.matmul(self._transition_mat(t-1), z_i)
            noise = self._gaussian_noise(t-1)

            #new_prec_mean = noise.prec_mean() + tf.matmul(noise.prec(), pred_mean)
            #incoming = MVGaussianNatural(new_prec_mean, noise.prec())
            incoming = MVGaussianMeanCov(noise.mean() + pred_mean, noise.cov())
            
            sampling_dist = back_filtered[t].multiply_density(incoming)
            sampling_dists.append(sampling_dist)
            
            z_i = sampling_dist.sample(epses[t])
            entropies.append(sampling_dist.entropy())            
            samples.append(z_i)

        self.sampling_dists = sampling_dists
        self.entropies = entropies

        entropy = tf.reduce_sum(tf.pack(entropies))
        sample = tf.reshape(tf.squeeze(tf.pack(samples)), self.output_shape)
        return sample, entropy
Esempio n. 18
0
def simple_rnn(incoming, n_units, activation='sigmoid', bias=True,
               weights_init='truncated_normal', return_seq=False,
               trainable=True, restore=True, name="SimpleRNN"):
    """ Simple RNN.

    Simple Recurrent Layer.

    Input:
        3-D Tensor [samples, timesteps, input dim].

    Output:
        if `return_seq`: 3-D Tensor [samples, timesteps, output dim].
        else: 2-D Tensor [samples, output dim].

    Arguments:
        incoming: `Tensor`. Incoming 3-D Tensor.
        n_units: `int`, number of units for this layer.
        activation: `str` (name) or `Tensor`. Activation applied to this layer.
            (See tflearn.activations). Default: 'sigmoid'.
        bias: `bool`. If True, a bias is used.
        weights_init: `str` (name) or `Tensor`. Weights initialization.
            (See tflearn.initializations) Default: 'truncated_normal'.
        return_seq: `bool`. If True, returns the full sequence instead of
            last sequence output only.
        name: `str`. A name for this layer (optional).

    """
    input_shape = utils.get_incoming_shape(incoming)
    W_init = initializations.get(weights_init)()

    with tf.name_scope(name) as scope:
        cell = BasicRNNCell(n_units, activation, bias, W_init, trainable)

        inference = incoming
        # If a tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array]:
            ndim = len(input_shape)
            assert ndim >= 3, "Input dim should be at least 3."
            axes = [1, 0] + list(range(2, ndim))
            inference = tf.transpose(inference, (axes))
            inference = tf.unpack(inference)

        # Track per layer variables
        tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + scope,
                             cell.W)
        if not restore:
            tf.add_to_collection(tf.GraphKeys.EXCL_RESTORE_VARS, cell.W)
        if bias:
            tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + scope,
                                 cell.b)
            if not restore:
                tf.add_to_collection(tf.GraphKeys.EXCL_RESTORE_VARS, cell.b)

        outputs, states = _rnn(cell, inference, dtype=tf.float32,
                               scope=scope[:-1])

        # Track activations.
        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1])

    return outputs if return_seq else outputs[-1]
Esempio n. 19
0
 def log_prob(self, xs, zs):
     K = self.kernel(xs)
     log_prior = multivariate_normal.logpdf(zs[:, :], cov=K)
     log_lik = tf.pack([tf.reduce_sum( \
         bernoulli.logpmf(xs[:,0], self.inverse_link(tf.mul(xs[:,0], z))) \
         ) for z in tf.unpack(zs)])
     return log_prior + log_lik
Esempio n. 20
0
def rnn_model(x, y):
  """Recurrent neural network model to predict from sequence of words
  to a class."""
  # Convert indexes of words into embeddings.
  # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
  # maps word indexes of the sequence into [batch_size, sequence_length,
  # EMBEDDING_SIZE].
  word_vectors = learn.ops.categorical_variable(x, n_classes=n_words,
      embedding_size=EMBEDDING_SIZE, name='words')

  # Split into list of embedding per word, while removing doc length dim.
  # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
  word_list = tf.unpack(word_vectors, axis=1)

  # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
  cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE)

  # Create an unrolled Recurrent Neural Networks to length of
  # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
  _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32)

  # Given encoding of RNN, take encoding of last step (e.g hidden size of the
  # neural network of last step) and pass it as features for logistic
  # regression over output classes.
  target = tf.one_hot(y, 15, 1, 0)
  prediction, loss = learn.models.logistic_regression(encoding, target)

  # Create a training op.
  train_op = tf.contrib.layers.optimize_loss(
      loss, tf.contrib.framework.get_global_step(),
      optimizer='Adam', learning_rate=0.01)

  return {'class': tf.argmax(prediction, 1), 'prob': prediction}, loss, train_op
Esempio n. 21
0
def inference (images, train=True):
    params = []
    out = cp_layer(images, "layer1", params, 5, 2, 2, 2, FLAGS.channels, 100)
    out = cp_layer(out, "layer2", params, 5, 2, 2, 2, 100, 200)
    out = cp_layer(out, "layer2", params, 3, 1, None, None, 200, 300)
    out = cp_layer(out, "layer3", params, 3, 1, None, None, 300, 300)
    if train:
        out = tf.nn.dropout(out, 0.1, name='dropout')
    out = cp_layer(out, "score", params, 1, 1, None, None, 300, FLAGS.out_channels, relu=False)
    score = out
    with tf.name_scope('upscale'):
        shape = tf.unpack(tf.shape(images))
        print(shape.__class__)
        shape.pop()
        shape.append(tf.constant(FLAGS.out_channels, dtype=tf.int32))
        print(len(shape))
        filters = tf.Variable(
                        tf.truncated_normal(
                            [31, 31, FLAGS.out_channels, FLAGS.out_channels],
                            dtype=tf.float32,
                            stddev=0.01),
                        name='filters')
        logits = tf.nn.conv2d_transpose(out, filters, tf.pack(shape),
                        [1,16,16,1], padding='SAME', name='upscale')
        # do we want to add bias?
    return logits, score, params
Esempio n. 22
0
def get_batch_tensor(batch_size, sequence_length, num_epochs,
                     filename='names.txt',
                     preprocessor=_clean):
    """Gets the data in good tensorflow ways. Adds a queue runner so be sure to
    start it."""
    with tf.name_scope('input'):
        # the data is tiny so just load it, clean it and throw it into a
        # constant
        with open(filename) as f:
            all_data = f.read()
        # process it
        all_data = preprocessor(all_data)
        # just chop off the end to make sure sequence_length * batch_size
        # divides the total number of records
        print(all_data)
        num_batches = all_data.shape[0] // (sequence_length * batch_size)
        all_data = all_data[:num_batches * sequence_length * batch_size]
        all_data = np.reshape(all_data, (-1, sequence_length))
        # and make the queue
        data = tf.train.slice_input_producer(
            [tf.constant(all_data)],
            num_epochs=num_epochs,
            shuffle=True,
            capacity=batch_size*sequence_length)

        # very much unconvinced this is all the right way round
        batch = tf.train.batch([data], batch_size=batch_size,
                               enqueue_many=True, num_threads=2)
        batch = tf.transpose(batch)
        return tf.unpack(batch)
Esempio n. 23
0
def cumprod(xs):
    """Cumulative product of a tensor along first dimension.

    https://github.com/tensorflow/tensorflow/issues/813

    Parameters
    ----------
    x : tf.Tensor
        vector, matrix, or n-Tensor

    Returns
    -------
    tf.Tensor
        A Tensor with `cumprod` applied along its first dimension.
    """
    values = tf.unpack(xs)
    out = []
    prev = tf.ones_like(values[0])
    for val in values:
        s = prev * val
        out.append(s)
        prev = s

    result = tf.pack(out)
    return result
Esempio n. 24
0
 def Loop(cell, w, i):
   x = tf.unpack(i, self.NUM_UNROLL)
   m = tf.zeros_like(x[0])
   c = tf.zeros_like(x[0])
   for i in range(self.NUM_UNROLL):
     m, c = cell(x[i], m, c, w)
   return m
Esempio n. 25
0
 def testInferNum(self):
   with self.test_session():
     for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
       x = tf.placeholder(np.float32, shape=shape)
       cs = tf.unpack(x)
       self.assertEqual(type(cs), list)
       self.assertEqual(len(cs), shape[0])
Esempio n. 26
0
def cumprod(xs):
    """Cumulative product of a tensor along its outer dimension.

    https://github.com/tensorflow/tensorflow/issues/813

    Parameters
    ----------
    xs : tf.Tensor
        A 1-D or higher tensor.

    Returns
    -------
    tf.Tensor
        A tensor with `cumprod` applied along its outer dimension.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(xs, msg='')]
    xs = control_flow_ops.with_dependencies(dependencies, xs)
    xs = tf.cast(xs, dtype=tf.float32)

    values = tf.unpack(xs)
    out = []
    prev = tf.ones_like(values[0])
    for val in values:
        s = prev * val
        out.append(s)
        prev = s

    result = tf.pack(out)
    return result
def print_progress(self, t, losses, sess):
    if t % self.n_print == 0:
        print("iter %d loss %.2f " % (t, np.mean(losses)))
        self.variational.print_params(sess)

        # Sample functions from variational model
        mean, std = sess.run([self.variational.m, self.variational.s])
        rs = np.random.RandomState(0)
        zs = rs.randn(10, self.variational.num_vars) * std + mean
        zs = tf.constant(zs, dtype=tf.float32)
        inputs = np.linspace(-3, 3, num=400, dtype=np.float32)
        x = tf.expand_dims(tf.constant(inputs), 1)
        mus = tf.pack([self.model.mapping(x, z) for z in tf.unpack(zs)])
        outputs = sess.run(mus)

        # Get data
        y, x = sess.run([self.data.data[:, 0], self.data.data[:, 1]])

        # Plot data and functions
        plt.cla()
        ax.plot(x, y, 'bx')
        ax.plot(inputs, outputs.T)
        ax.set_xlim([-3, 3])
        ax.set_ylim([-0.5, 1.5])
        plt.draw()
Esempio n. 28
0
    def _tile_along_beam(cls, beam_size, state):
        if nest.is_sequence(state):
            return nest_map(
                lambda val: cls._tile_along_beam(beam_size, val),
                state
            )

        if not isinstance(state, tf.Tensor):
            raise ValueError("State should be a sequence or tensor")

        tensor = state

        tensor_shape = tensor.get_shape().with_rank_at_least(1)

        try:
            new_first_dim = tensor_shape[0] * beam_size
        except:
            new_first_dim = None

        dynamic_tensor_shape = tf.unpack(tf.shape(tensor))
        res = tf.expand_dims(tensor, 1)
        res = tf.tile(res, [1, beam_size] + [1] * (tensor_shape.ndims-1))
        res = tf.reshape(res, [-1] + list(dynamic_tensor_shape[1:]))
        res.set_shape([new_first_dim] + list(tensor_shape[1:]))
        return res
Esempio n. 29
0
        def unit(x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unpack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.pack([current_hidden_state, c])
Esempio n. 30
0
def rnn_model(features, target):
  """RNN model to predict from sequence of words to a class."""
  # Convert indexes of words into embeddings.
  # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
  # maps word indexes of the sequence into [batch_size, sequence_length,
  # EMBEDDING_SIZE].
  word_vectors = tf.contrib.layers.embed_sequence(
      features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE, scope='words')

  # Split into list of embedding per word, while removing doc length dim.
  # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
  word_list = tf.unpack(word_vectors, axis=1)

  # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
  cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE)

  # Create an unrolled Recurrent Neural Networks to length of
  # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
  _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32)

  # Given encoding of RNN, take encoding of last step (e.g hidden size of the
  # neural network of last step) and pass it as features for logistic
  # regression over output classes.
  target = tf.one_hot(target, 15, 1, 0)
  logits = tf.contrib.layers.fully_connected(encoding, 15, activation_fn=None)
  loss = tf.contrib.losses.softmax_cross_entropy(logits, target)

  # Create a training op.
  train_op = tf.contrib.layers.optimize_loss(
      loss, tf.contrib.framework.get_global_step(),
      optimizer='Adam', learning_rate=0.01)

  return (
      {'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits)},
      loss, train_op)
Esempio n. 31
0
def rnn(step_function,
        inputs,
        initial_states,
        go_backwards=False,
        mask=None,
        constants=None,
        unroll=False,
        input_length=None):
    '''Iterates over the time dimension of a tensor.

    # Arguments
        inputs: tensor of temporal data of shape (samples, time, ...)
            (at least 3D).
        step_function:
            Parameters:
                input: tensor with shape (samples, ...) (no time dimension),
                    representing input for the batch of samples at a certain
                    time step.
                states: list of tensors.
            Returns:
                output: tensor with shape (samples, ...) (no time dimension),
                new_states: list of tensors, same length and shapes
                    as 'states'.
        initial_states: tensor with shape (samples, ...) (no time dimension),
            containing the initial values for the states used in
            the step function.
        go_backwards: boolean. If True, do the iteration over
            the time dimension in reverse order.
        mask: binary tensor with shape (samples, time, 1),
            with a zero for every element that is masked.
        constants: a list of constant values passed at each step.
        unroll: with TensorFlow the RNN is always unrolled, but with Theano you
            can use this boolean flag to unroll the RNN.
        input_length: not relevant in the TensorFlow implementation.
            Must be specified if using unrolling with Theano.

    # Returns
        A tuple (last_output, outputs, new_states).

        last_output: the latest output of the rnn, of shape (samples, ...)
        outputs: tensor with shape (samples, time, ...) where each
            entry outputs[s, t] is the output of the step function
            at time t for sample s.
        new_states: list of tensors, latest states returned by
            the step function, of shape (samples, ...).
    '''
    ndim = len(inputs.get_shape())
    assert ndim >= 3, "Input should be at least 3D."
    axes = [1, 0] + list(range(2, ndim))
    inputs = tf.transpose(inputs, (axes))
    input_list = tf.unpack(inputs)
    if constants is None:
        constants = []

    states = initial_states
    successive_states = []
    successive_outputs = []
    if go_backwards:
        input_list.reverse()

    if mask is not None:
        # Transpose not supported by bool tensor types, hence round-trip to uint8.
        mask = tf.cast(mask, tf.uint8)
        if len(mask.get_shape()) == ndim - 1:
            mask = expand_dims(mask)
        mask = tf.cast(tf.transpose(mask, axes), tf.bool)
        mask_list = tf.unpack(mask)

        if go_backwards:
            mask_list.reverse()

        for input, mask_t in zip(input_list, mask_list):
            output, new_states = step_function(input, states + constants)

            # tf.select needs its condition tensor to be the same shape as its two
            # result tensors, but in our case the condition (mask) tensor is
            # (nsamples, 1), and A and B are (nsamples, ndimensions). So we need to
            # broadcast the mask to match the shape of A and B. That's what the
            # tile call does, is just repeat the mask along its second dimension
            # ndimensions times.
            tiled_mask_t = tf.tile(mask_t, tf.pack([1, tf.shape(output)[1]]))

            if len(successive_outputs) == 0:
                prev_output = zeros_like(output)
            else:
                prev_output = successive_outputs[-1]

            output = tf.select(tiled_mask_t, output, prev_output)

            return_states = []
            for state, new_state in zip(states, new_states):
                # (see earlier comment for tile explanation)
                tiled_mask_t = tf.tile(mask_t,
                                       tf.pack([1, tf.shape(new_state)[1]]))
                return_states.append(tf.select(tiled_mask_t, new_state, state))

            states = return_states
            successive_outputs.append(output)
            successive_states.append(states)
    else:
        for input in input_list:
            output, states = step_function(input, states + constants)
            successive_outputs.append(output)
            successive_states.append(states)

    last_output = successive_outputs[-1]
    outputs = tf.pack(successive_outputs)
    new_states = successive_states[-1]

    axes = [1, 0] + list(range(2, len(outputs.get_shape())))
    outputs = tf.transpose(outputs, axes)
    return last_output, outputs, new_states
Esempio n. 32
0
inference = ed.MFVI(model, variational, data)
sess = inference.initialize(n_print=10)
for t in range(1000):
    loss = inference.update(sess)
    if t % inference.n_print == 0:
        print("iter {:d} loss {:.2f}".format(t, loss))

        # Sample functions from variational model
        mean, std = sess.run(
            [variational.layers[0].m, variational.layers[0].s])
        rs = np.random.RandomState(0)
        zs = rs.randn(10, variational.num_vars) * std + mean
        zs = tf.constant(zs, dtype=tf.float32)
        inputs = np.linspace(-8, 8, num=400, dtype=np.float32)
        x = tf.expand_dims(tf.constant(inputs), 1)
        mus = tf.pack([model.mapping(x, z) for z in tf.unpack(zs)])
        outputs = sess.run(mus)

        # Get data
        y, x = sess.run([data.data[:, 0], data.data[:, 1]])

        # Plot data and functions
        plt.cla()
        ax.plot(x, y, 'bx')
        ax.plot(inputs, outputs.T)
        ax.set_xlim([-8, 8])
        ax.set_ylim([-2, 3])
        plt.draw()
        plt.pause(1.0 / 60.0)
Esempio n. 33
0
    def create_q_network(self):

       with tf.variable_scope("critic_net") as scope_pi:
           lstm_layer_input = tf.placeholder("float", [None, self.user_num, self.state_dim])
           action_input = tf.placeholder("float", [None, self.user_num, self.state_dim])

           step_size = tf.placeholder("float", [1])

           initial_lstm_state_forward = tf.placeholder("float", [2, None, self.fc_layer_size])

           initial_lstm_state_forward_list  = tf.unpack(initial_lstm_state_forward, axis=0)

           initial_lstm_state_forward_input = tf.nn.rnn_cell.LSTMStateTuple(initial_lstm_state_forward_list[0],
                                                                    initial_lstm_state_forward_list[1])

           initial_lstm_state_backward = tf.placeholder("float", [2, None, self.fc_layer_size])

           initial_lstm_state_backward_list = tf.unpack(initial_lstm_state_forward, axis=0)

           initial_lstm_state_backward_input = tf.nn.rnn_cell.LSTMStateTuple(initial_lstm_state_backward_list[0],
                                                                            initial_lstm_state_backward_list[1])


           input_s = tf.reshape(lstm_layer_input, [-1, self.state_dim])
           input_a = tf.reshape(action_input, [-1, self.action_dim])

           # encoder layer parameters
           W1_s = tf.get_variable("W1_s", [self.state_dim, self.fc_layer_size],
                                  initializer=tf.random_uniform([self.state_dim, self.fc_layer_size],
                                                                -1 / math.sqrt(self.state_dim),
                                                                1 / math.sqrt(self.state_dim)))

           W1_a = tf.get_variable("W1_a", [self.action_dim, self.fc_layer_size],
                                  initializer=tf.random_uniform([self.action_dim, self.fc_layer_size],
                                                                -1 / math.sqrt(self.action_dim),
                                                                1 / math.sqrt(self.action_dim)))

           b1 = tf.get_variable("b1", [self.fc_layer_size],
                                initializer=tf.random_uniform([self.fc_layer_size], -1 / math.sqrt(self.state_dim),
                                                              1 / math.sqrt(self.state_dim)))
           W2_fw = tf.get_variable("W2_fw", [self.fc_layer_size, 1],
                                   initializer=tf.random_uniform([self.fc_layer_size, self.action_dim],
                                                                 -1 / math.sqrt(self.fc_layer_size),
                                                                 1 / math.sqrt(self.fc_layer_size)))
           W2_bw = tf.get_variable("W2_bw", [self.fc_layer_size, 1],
                                   initializer=tf.random_uniform([self.fc_layer_size, self.action_dim],
                                                                 -1 / math.sqrt(self.fc_layer_size),
                                                                 1 / math.sqrt(self.fc_layer_size)))
           b2 = tf.get_variable("b1", [1],
                                initializer=tf.random_uniform([self.action_dim], -1 / math.sqrt(self.fc_layer_size),
                                                              1 / math.sqrt(self.fc_layer_size)))

           h_fc = tf.nn.relu(tf.matmul(input_s, W1_s) + tf.matmul(input_a, W1_a) + b1)
           h_fc1 = tf.reshape(h_fc, [-1, self.user_num, self.fc_layer_size])
           with tf.variable_scope('forward'):
               lstm_forward_cell = tf.nn.rnn_cell.BasicLSTMCell(self.fc_layer_size, state_is_tuple=False)
           with tf.variable_scope('backward'):
               lstm_backward_cell = tf.nn.rnn_cell.BasicLSTMCell(self.fc_layer_size, state_is_tuple=False)

           # "outputs" is a tuple (outputs_forward, outputs_backward).
           # We set "time_major=True" and [num_user, batch_size, fc_layer_size]

           (outputs, output_state) = tf.nn.bidirectional_dynamic_rnn(lstm_forward_cell,
                                                                     lstm_backward_cell,
                                                                     h_fc1,
                                                                     initial_state_fw=initial_lstm_state_forward_input ,
                                                                     initial_state_bw=initial_lstm_state_backward_input ,
                                                                     sequence_length=step_size,
                                                                     time_major=False,
                                                                     scope=scope_pi)

           output_fw = tf.reshape(outputs[0], [-1, self.fc_layer_size])
           output_bw = tf.reshape(outputs[1], [-1, self.fc_layer_size])
           # output layer
           q_value_output = tf.reshape(tf.tanh(tf.matmul(output_fw, W2_fw) + tf.matmul(output_bw, W2_bw) + b2),
                                       [-1,self.user_num,  1])

           scope_pi.reuse_variables()
           W_lstm = tf.get_variable("BasicLSTMCell/Linear/Matrix")
           b_lstm = tf.get_variable("BasicLSTMCell/Linear/Bias")


           return  lstm_layer_input, action_input, q_value_output, [W1_s,W1_a,b1,W2_fw,W2_bw,b2,W_lstm,b_lstm],output_state, initial_lstm_state_forward,initial_lstm_state_backward,step_size
Esempio n. 34
0
def ppc(model,
        variational=None,
        data=Data(),
        T=None,
        size=100,
        sess=tf.Session()):
    """
    Posterior predictive check.
    (Rubin, 1984; Meng, 1994; Gelman, Meng, and Stern, 1996)
    If variational is not specified, it defaults to a prior predictive
    check (Box, 1980).

    PPC's form an empirical distribution for the predictive discrepancy,
    p(T) = \int p(T(yrep) | z) p(z | y) dz
    by drawing replicated data sets yrep and calculating T(yrep) for
    each data set. Then it compares it to T(y).

    Parameters
    ----------
    model : Model
        class object with a 'sample_likelihood' method
    variational : Variational, optional
        latent variable distribution q(z) to sample from. It is an
        approximation to the posterior, e.g., a variational
        approximation or an empirical distribution from MCMC samples.
        If not specified, samples will be obtained from model
        with a 'sample_prior' method.
    data : Data, optional
        Observed data to compare to. If not specified, will return
        only the reference distribution with an assumed replicated
        data set size of 1.
    T : function, optional
        Discrepancy function written in TensorFlow. Default is
        identity. It is a function taking in a data set
        y and optionally a set of latent variables z as input.
    size : int, optional
        number of replicated data sets
    sess : tf.Session, optional
        session used during inference

    Returns
    -------
    list
        List containing the reference distribution, which is a Numpy
        vector of size elements,
        (T(yrep^{1}, z^{1}), ..., T(yrep^{size}, z^{size}));
        and the realized discrepancy, which is a NumPy vector of size
        elements,
        (T(y, z^{1}), ..., T(y, z^{size})).
    """
    y = data.data
    if y == None:
        N = 1
    else:
        N = data.N

    if T == None:
        T = lambda y, z=None: y

    # 1. Sample from posterior (or prior).
    # We must fetch zs out of the session because sample_likelihood()
    # may require a SciPy-based sampler.
    if variational != None:
        zs, samples = variational.sample(y, size=size)
        feed_dict = variational.np_sample(samples, size, sess=sess)
        zs = sess.run(zs, feed_dict)
    else:
        zs = model.sample_prior(size=size)
        zs = sess.run(zs)

    # 2. Sample from likelihood.
    yreps = model.sample_likelihood(zs, size=N)
    # 3. Calculate discrepancy.
    Tyreps = []
    Tys = []
    for yrep, z in zip(yreps, tf.unpack(zs)):
        Tyreps += [T(yrep, z)]
        if y != None:
            Tys += [T(y, z)]

    if y == None:
        return sess.run(tf.pack(Tyreps), feed_dict)
    else:
        return sess.run([tf.pack(Tyreps), tf.pack(Tys)], feed_dict)
    def __init__(self, is_training, vocab_size, labels_idx):

        self.labels_idx = labels_idx
        labels_size = len(labels_idx)

        self.vocab_size = vocab_size
        self.context_steps = FLAGS.context_steps
        self.question_steps = FLAGS.question_steps

        self.questions = tf.placeholder(tf.int32, [None, self.question_steps])
        self.enc_y = tf.placeholder(tf.int32, [None])
        self.bin_y = tf.placeholder(tf.int32, [None, labels_size])
        self.ques_lengths = tf.placeholder(tf.int32, [None])

        ques_embedding = tf.get_variable(
            "ques_embedding", [self.vocab_size, FLAGS.qs_size],
            dtype=data_type())

        # bidirectional lstm
        ques_lstm = BiLSTM(self.questions, self.ques_lengths,
                           is_training, ques_embedding, name='ques')
        ques_outputs = ques_lstm.outputs[-1]
        self._initial_state = ques_lstm._initial_state

        # Use the concatenated hidden states of the final and initial LSTM cells
        # for prediction.
        state_fw = ques_lstm.state_fw
        state_bw = ques_lstm.state_bw
        hidden_state_fw = state_fw.h
        hidden_state_bw = state_bw.h
        hidden_state = tf.concat(1, (hidden_state_fw, hidden_state_bw))
        print("Shape of the hidden state %s." % hidden_state.get_shape())

        self.contexts = tf.placeholder(tf.int32, shape=[FLAGS.batch_size, FLAGS.context_steps])
        self.cont_lengths = tf.placeholder(tf.int32, shape=[None])

        # Shape = (batch_size X context_length X ans_hidden_size)
        context_embedding = tf.get_variable(
            "context_embedding",
            [self.vocab_size, FLAGS.ans_size], dtype=tf.float32)
        context_transformed = tf.nn.embedding_lookup(
            context_embedding, self.contexts)

        bilinear = tf.get_variable(
            "bilinear",
            [2*FLAGS.qs_size, FLAGS.ans_size], dtype=tf.float32)
        softmax_W = tf.get_variable(
            "softmax_W", [FLAGS.ans_size, labels_size])
        softmax_b = tf.get_variable("softmax_b", [labels_size])

        # Shape = (batch_size X ans_hidden_size)
        ques_transform = tf.matmul(hidden_state, bilinear)

        self._logits = []
        self._predictions = []
        ques_batches = tf.unpack(ques_transform, axis=0)
        ans_batches = tf.unpack(context_transformed, axis=0)
        for q_b, a_b in zip(ques_batches, ans_batches):
            tmp = tf.matmul(tf.expand_dims(q_b, dim=0), tf.transpose(a_b))[0]
            att = tf.expand_dims(tf.nn.softmax(tmp), 0)
            cont_final = tf.matmul(att, a_b)
            self._logits.append(
                tf.add(tf.matmul(cont_final, softmax_W), softmax_b)[0])

        self._cost = cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(self._logits, self.bin_y))
        self._predictions = tf.argmax(self._logits, 1)
        print(self._predictions.get_shape())
        correct_preds = tf.equal(tf.to_int32(self._predictions), self.enc_y)
        self._acc = tf.reduce_mean(tf.cast(correct_preds, "float"))

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars),
                                          FLAGS.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 36
0
def decoder_rnn(conv_encoder,
                decoder_inputs,
                decoder_hidden,
                weigth_generation,
                n_steps,
                bias_generation,
                batch_size,
                keep_prob,
                embedding,
                sample_rate,
                lstm_layer=1,
                is_train=True):

    with tf.name_scope('decoder_rnn') as scope:

        lstm_cell = rnn_cell.BasicLSTMCell(decoder_hidden,
                                           forget_bias=1.0,
                                           state_is_tuple=True)

        if lstm_layer > 1:
            lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * lstm_layer)

        initial_state = lstm_cell.zero_state(batch_size, tf.float32)

        batch_decoder_inputs = tf.nn.embedding_lookup(embedding,
                                                      decoder_inputs)
        batch_decoder_inputs = tf.transpose(batch_decoder_inputs, [1, 0, 2])
        batch_decoder_inputs = tf.unpack(batch_decoder_inputs)
        batch_decoder_inputs = [
            tf.concat(1, [batch_decoder_inputs[i], conv_encoder])
            for i in range(len(batch_decoder_inputs))
        ]

        if is_train:

            def func(prev, i):

                #words prob
                words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation),
                                            bias_generation)

                sample = tf.argmax(words_prob, 1)
                prev_word = tf.nn.embedding_lookup(embedding, sample)
                prev_outputs = tf.concat(1, [prev_word, conv_encoder])

                # select from prev_outputs and ground truth
                prob = tf.random_uniform(minval=0,
                                         maxval=1,
                                         shape=(batch_size, ))
                mask = tf.cast(tf.greater(sample_rate, prob), tf.float32)
                mask = tf.expand_dims(mask, 1)
                mask = tf.tile(mask,
                               [1, prev_outputs.get_shape().as_list()[-1]])

                next_input = mask * prev_outputs + (
                    1 - mask) * batch_decoder_inputs[i]

                return next_input

            outputs, state = seq2seq.rnn_decoder(
                decoder_inputs=batch_decoder_inputs,
                initial_state=initial_state,
                cell=lstm_cell,
                loop_function=func,
                scope='rnn_decoder')

        else:

            def func(prev, i):

                #words prob
                words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation),
                                            bias_generation)

                sample = tf.argmax(words_prob, 1)
                prev_word = tf.nn.embedding_lookup(embedding, sample)
                prev_outputs = tf.concat(1, [prev_word, conv_encoder])

                return prev_outputs

            outputs, state = seq2seq.rnn_decoder(
                decoder_inputs=batch_decoder_inputs,
                initial_state=initial_state,
                cell=lstm_cell,
                loop_function=func,
                scope='rnn_decoder')

        outputs = tf.nn.dropout(outputs, keep_prob)
        outputs = tf.unpack(outputs)

        res = [0 for i in range(n_steps)]
        for i in range(len(outputs)):

            #words prob
            res[i] = tf.nn.bias_add(tf.matmul(outputs[i], weigth_generation),
                                    bias_generation)

        return res, state
Esempio n. 37
0
# 5*15
batchX_placeholder = tf.placeholder(tf.float32,
                                    [batch_size, truncated_backprop_length])
batchY_placeholder = tf.placeholder(tf.int32,
                                    [batch_size, truncated_backprop_length])
# 5*4
init_state = tf.placeholder(tf.float32, [batch_size, state_size])
# 5*4
W = tf.Variable(np.random.rand(state_size + 1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1, state_size)), dtype=tf.float32)  #1*4
#4*2
W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32)  #1*2

# Unpack columns
inputs_series = tf.unpack(batchX_placeholder, axis=1)
labels_series = tf.unpack(batchY_placeholder, axis=1)

# Forward pass
current_state = init_state
states_series = []
for current_input in inputs_series:
    current_input = tf.reshape(current_input, [batch_size, 1])
    input_and_state_concatenated = tf.concat(
        1, [current_input, current_state])  # Increasing number of columns

    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) +
                         b)  # Broadcasted addition
    states_series.append(next_state)
    current_state = next_state
def resnet_v1_sdc(inputs,
                  blocks,
                  output_cfg,
                  version,
                  dropout_keep_prob=0.8,
                  bayesian=False,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  lock_root=False,
                  reuse=None,
                  scope=None):
    """Generator for v1 ResNet models.
  
    This function generates a family of ResNet v1 models. See the resnet_v1_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce ResNets of various depths.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels].
      blocks: A list of length equal to the number of ResNet blocks. Each element
        is a Block object describing the units in the block.
      is_training: whether is training or not.
      global_pool: If True, we perform global average pooling before computing the
        logits. Set to True for image classification, False for dense prediction.
      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      output: Dict of rank-4 tensors of size [batch, height_out, width_out, channels_out].
      endpoints: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        endpoints_collection = sc.name + '_end_points'
        arg_scope_ep = slim.arg_scope(
            [slim.conv2d, bottleneck, stack_blocks_dense],
            outputs_collections=endpoints_collection)
        arg_scope_train = slim.arg_scope([slim.batch_norm, slim.dropout],
                                         is_training=is_training)
        with arg_scope_ep, arg_scope_train:
            nets = []
            siamese = True if len(inputs.get_shape()) == 5 else False
            if siamese:
                with tf.variable_scope(sc, values=[inputs],
                                       reuse=reuse) as scs:
                    # siamese, multi-image config
                    unpacked_inputs = tf.unpack(inputs, axis=1)
                    for i, x in enumerate(unpacked_inputs):
                        branch_scope = 'Branch_%d' % i
                        with tf.name_scope(branch_scope):
                            net, _ = _build_resnet_root(
                                x,
                                block_cfg=blocks,
                                global_pool=global_pool,
                                output_stride=output_stride,
                                lock_root=lock_root)
                        scs.reuse_variables()
                        nets.append(net)
            else:
                # normal config
                global_context = True if version == 7 else False
                #output_stride = output_stride if version != 7 else 16
                net, block_outputs = _build_resnet_root(
                    inputs,
                    block_cfg=blocks,
                    output_stride=output_stride,
                    lock_root=lock_root)

                if global_context:
                    net = _build_global_context(
                        net,
                        is_training=is_training,
                        bayesian=bayesian,
                        dropout_keep_prob=dropout_keep_prob)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(net, [1, 2],
                                         name='pool5',
                                         keep_dims=True)
                    print('Global pool', net.get_shape())

                nets.append(net)

                if version == 6:
                    #  version 6 variant takes an additional global pool from earlier block before the last stride
                    net2 = tf.reduce_mean(block_outputs[11], [1, 2],
                                          name='pool5a',
                                          keep_dims=True)
                    print('Global pool 2', net2.get_shape())
                    nets.append(net2)

            output = _build_output(nets,
                                   output_cfg=output_cfg,
                                   version=version,
                                   is_training=is_training,
                                   bayesian=bayesian,
                                   dropout_keep_prob=dropout_keep_prob)

            endpoints = slim.utils.convert_collection_to_dict(
                endpoints_collection)

            return output, endpoints
Esempio n. 39
0
    def __init__(self, name):
        with tf.variable_scope('imply') as scope:
            # set up placeholders
            self.partial_obs = tf.placeholder(tf.float32, [N_BATCH, L, 2],
                                              name="partial_obs")
            self.full_obs = tf.placeholder(tf.float32, [N_BATCH, L, 2],
                                           name="full_obs")

            # some constants
            self.n_hidden = 200

            # make hidden represnatation
            W1 = weight_variable([L * 2, self.n_hidden])
            b1 = bias_variable([self.n_hidden])

            W2 = weight_variable([self.n_hidden, self.n_hidden])
            b2 = bias_variable([self.n_hidden])

            partial_flat = tf.reshape(self.partial_obs, [N_BATCH, L * 2])
            hidden = tf.nn.relu(tf.matmul(partial_flat, W1) + b1)
            hidden = tf.nn.relu(tf.matmul(hidden, W2) + b2)

            W_preds = [weight_variable([self.n_hidden, 2]) for _ in range(L)]
            b_preds = [bias_variable([2]) for _ in range(L)]
            e2 = tf.constant(1e-10, shape=[N_BATCH, 2])

            self.query_preds = [
                tf.nn.softmax(tf.matmul(hidden, W_preds[i]) + b_preds[i]) + e2
                for i in range(L)
            ]
            print "query_preds shape ", show_dim(self.query_preds)

            # doing some reshape of the input tensor
            full_obs_trans = tf.transpose(self.full_obs, perm=[1, 0, 2])
            print full_obs_trans.get_shape()
            full_obs_split = tf.reshape(full_obs_trans, [L, N_BATCH, 2])
            full_obs_split = tf.unpack(full_obs_split)
            print show_dim(full_obs_split)

            self.query_pred_costs = []
            for idx in range(L):
                blah = -tf.reduce_sum(
                    full_obs_split[idx] * tf.log(self.query_preds[idx]))
                self.query_pred_costs.append(blah)

            print "costs shapes ", show_dim(self.query_pred_costs)
            self.cost_query_pred = sum(self.query_pred_costs)

            # ------------------------------------------------------------------------ training steps
            # gvs = optimizer.compute_gradients(cost)
            # capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
            # train_op = optimizer.apply_gradients(capped_gvs)

            # optimizer = tf.train.RMSPropOptimizer(0.0001)
            # optimizer = tf.train.RMSPropOptimizer(0.0001)
            optimizer = tf.train.AdagradOptimizer(0.005)

            pred_gvs = optimizer.compute_gradients(self.cost_query_pred)
            capped_pred_gvs = [(tf.clip_by_value(grad, -5., 5.), var)
                               for grad, var in pred_gvs]
            #train_pred = optimizer.minimize(cost_pred, var_list = VAR_pred)
            self.train_query_pred = optimizer.apply_gradients(capped_pred_gvs)

            # train_query_pred = optimizer.minimize(cost_query_pred, var_list = VAR_pred)
            # Before starting, initialize the variables.  We will 'run' this first.
            self.init = tf.initialize_all_variables()
            self.saver = tf.train.Saver()
Esempio n. 40
0
init.run()

for t in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

    if t % inference.n_print == 0:
        # Sample functions from variational model
        mean, std = sess.run([qz.mu, qz.sigma])
        rs = np.random.RandomState(0)
        zs = rs.randn(10, model.n_vars) * std + mean
        zs = tf.convert_to_tensor(zs, dtype=tf.float32)
        inputs = np.linspace(-8, 8, num=400, dtype=np.float32)
        x = tf.expand_dims(inputs, 1)
        mus = []
        for z in tf.unpack(zs):
            mus += [model.neural_network(x, z)]

        outputs = tf.pack(mus).eval()

        # Get data
        x, y = data['x'], data['y']

        # Plot data and functions
        plt.cla()
        ax.plot(x, y, 'bx')
        ax.plot(inputs, outputs.T)
        ax.set_xlim([-8, 8])
        ax.set_ylim([-2, 3])
        plt.draw()
        plt.pause(1.0 / 60.0)
Esempio n. 41
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, L],
                               name='x')  # [num_batch, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q],
                                  name='q')  # [num_batch, sentence_len]
        answer = tf.placeholder('int32', shape=[N],
                                name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L],
                                    name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = tf.unpack(tf.transpose(input))
            input_states, _ = seq2seq.embedding_rnn_decoder(
                input_list, gru.zero_state(N, tf.float32), gru, A, V)

            # Question module
            scope.reuse_variables()

            ques_list = tf.unpack(tf.transpose(question))
            questions, _ = seq2seq.embedding_rnn_decoder(
                ques_list, gru.zero_state(N, tf.float32), gru, A, V)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states),
                                    [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :],
                                       input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(
                tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
Esempio n. 42
0
def trainFine(conf, jointTrain=False, resume=True):
    # Parameters
    learning_rate = conf.fine_learning_rate
    batch_size = conf.fine_batch_size
    display_step = conf.display_step
    n_input = conf.psz
    n_classes = conf.n_classes
    dropout = conf.dropout
    imsz = conf.imsz
    rescale = conf.rescale
    scale = conf.scale
    pool_scale = conf.pool_scale

    x0, x1, x2, y, keep_prob = createPlaceHolders(imsz, rescale, scale,
                                                  pool_scale, n_classes)
    locs_ph = tf.placeholder(tf.float32, [conf.batch_size, n_classes, 2])
    learning_rate_ph = tf.placeholder(tf.float32, shape=[])

    weights = initNetConvWeights(conf)
    pred_gradient, layers = net_multi_conv(x0, x1, x2, weights, keep_prob,
                                           imsz, rescale, pool_scale)

    baseoutname = '%s_%d.ckpt' % (conf.outname, conf.base_training_iters)
    basemodelfile = os.path.join(conf.cachedir, baseoutname)

    sess = tf.Session()
    saver = tf.train.Saver()

    pred = tf.stop_gradient(pred_gradient)
    training_iters = conf.fine_training_iters
    outname = conf.fineoutname
    print("Restoring base model from:" + basemodelfile)
    saver.restore(sess, basemodelfile)

    # Construct fine model
    labelT = multiPawTools.createFineLabelTensor(conf)
    layer1_1 = tf.stop_gradient(layers['base_dict_0']['conv1'])
    layer1_2 = tf.stop_gradient(layers['base_dict_0']['conv2'])
    layer2_1 = tf.stop_gradient(layers['base_dict_1']['conv1'])
    layer2_2 = tf.stop_gradient(layers['base_dict_1']['conv2'])
    curfine1_1 = extractPatches(layer1_1, pred, conf, 1, 4)
    curfine1_2 = extractPatches(layer1_2, pred, conf, 2, 2)
    curfine2_1 = extractPatches(layer2_1, pred, conf, 2, 2)
    curfine2_2 = extractPatches(layer2_2, pred, conf, 4, 1)
    curfine1_1u = tf.unpack(tf.transpose(curfine1_1, [1, 0, 2, 3, 4]))
    curfine1_2u = tf.unpack(tf.transpose(curfine1_2, [1, 0, 2, 3, 4]))
    curfine2_1u = tf.unpack(tf.transpose(curfine2_1, [1, 0, 2, 3, 4]))
    curfine2_2u = tf.unpack(tf.transpose(curfine2_2, [1, 0, 2, 3, 4]))
    finepred = fineOut(curfine1_1u, curfine1_2u, curfine2_1u, curfine2_2u,
                       conf)
    limgs = multiPawTools.createFineLabelImages(locs_ph, pred, conf, labelT)

    # training data stuff
    lmdbfilename = os.path.join(conf.cachedir, conf.trainfilename)
    vallmdbfilename = os.path.join(conf.cachedir, conf.valfilename)
    env = lmdb.open(lmdbfilename, readonly=True)
    valenv = lmdb.open(vallmdbfilename, readonly=True)

    # Define loss and optimizer
    costFine = tf.reduce_mean(tf.nn.l2_loss(finepred - tf.to_float(limgs)))
    costBase = tf.reduce_mean(tf.nn.l2_loss(pred - y))

    cost = costFine

    saver1 = tf.train.Saver(max_to_keep=conf.maxckpt)

    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate_ph).minimize(cost)

    outfilename = os.path.join(conf.cachedir, conf.fineoutname)
    traindatafilename = os.path.join(conf.cachedir, conf.datafinename)
    latest_ckpt = tf.train.get_checkpoint_state(
        conf.cachedir, latest_filename=conf.ckptfinename)

    if not latest_ckpt or not resume:
        startat = 0
        trainData = {'train_err': [], 'val_err': [], 'step_no': []}
        varlist = tf.all_variables()
        for var in varlist:
            try:
                sess.run(tf.assert_variables_initialized([var]))
            except tf.errors.FailedPreconditionError:
                sess.run(tf.initialize_variables([var]))

    else:
        saver1.restore(latest_ckpt.model_checkpoint_path)
        matchObj = re.match(outfilename + '-(\d*)', ckpt.model_checkpoint_path)
        startat = int(matchObj.group(1) + 1)
        tdfile = open(traindatafilename, 'rb')
        trainData = pickle.load(tdfile)
        tdfile.close()

#             print('Initializing variable %s'%var.name)

#     init = tf.initialize_all_variables()
#     sess.run(init)

    with env.begin() as txn, valenv.begin() as valtxn:
        train_cursor = txn.cursor()
        val_cursor = valtxn.cursor()

        # Keep training until reach max iterations
        for step in range(startat, training_iters):
            excount = step * batch_size
            cur_lr = learning_rate * conf.gamma**math.floor(
                old_div(excount, conf.step_size))

            batch_xs, locs = multiPawTools.readLMDB(train_cursor, batch_size,
                                                    imsz, multiResData)

            locs = multiResData.sanitize_locs(locs)

            x0_in, x1_in, x2_in = multiPawTools.iScaleImages(
                batch_xs.transpose([0, 2, 3, 1]), rescale, scale)

            labelims = multiPawTools.createLabelImages(
                locs, conf.imsz, conf.pool_scale * conf.rescale,
                conf.label_blur_rad)
            feed_dict = {
                x0: x0_in,
                x1: x1_in,
                x2: x2_in,
                y: labelims,
                keep_prob: dropout,
                locs_ph: np.array(locs),
                learning_rate_ph: cur_lr
            }
            sess.run(optimizer, feed_dict=feed_dict)

            if step % display_step == 0:
                feed_dict = {
                    x0: x0_in,
                    x1: x1_in,
                    x2: x2_in,
                    y: labelims,
                    keep_prob: 1.,
                    locs_ph: np.array(locs)
                }
                train_loss = sess.run([cost, costBase], feed_dict=feed_dict)

                numrep = int(old_div(conf.num_test, conf.batch_size)) + 1
                acc = 0
                loss = 0
                for rep in range(numrep):
                    val_xs, locs = multiPawTools.readLMDB(
                        val_cursor, batch_size, imsz, multiResData)
                    x0_in, x1_in, x2_in = multiPawTools.multiScaleImages(
                        val_xs.transpose([0, 2, 3, 1]), rescale, scale)

                    labelims = multiPawTools.createLabelImages(
                        locs, conf.imsz, conf.pool_scale * conf.rescale,
                        conf.label_blur_rad)
                    feed_dict = {
                        x0: x0_in,
                        x1: x1_in,
                        x2: x2_in,
                        y: labelims,
                        keep_prob: 1.,
                        locs_ph: np.array(locs)
                    }
                    loss += sess.run(cost, feed_dict=feed_dict)
                loss = old_div((old_div(loss, numrep)), batch_size)
                print("Iter " + str(step) + "  Minibatch Loss= " +
                      "{:.3f}".format(loss) + ", Training Loss= " +
                      "{:.3f}".format(old_div(train_loss[0], batch_size)) +
                      ", Base Training Loss= " +
                      "{:.3f}".format(old_div(train_loss[1], batch_size)))
                trainData['train_err'].append(
                    old_div(train_loss[0], batch_size))
                trainData['val_err'].append(loss)
                trainData['step_no'].append(step)

            if step % conf.save_step == 0:
                saver1.save(sess,
                            outfilename,
                            global_step=step,
                            latest_filename=conf.ckptfinename)
                print('Saved state to %s-%d' % (outfilename, step))
                tdfile = open(traindatafilename, 'wb')
                pickle.dump(trainData, tdfile)
                tdfile.close()
#             if step % conf.save_step == 0:
#                 curoutname = '%s_%d.ckpt'% (outname,step)
#                 outfilename = os.path.join(conf.cachedir,curoutname)
#                 saver1.save(sess,outfilename)
#                 print('Saved state to %s' %(outfilename))

            step += 1

        print("Optimization Finished!")
        saver1.save(sess,
                    outfilename,
                    global_step=step,
                    latest_filename=conf.ckptfinename)
        print('Saved state to %s-%d' % (outfilename, step))
        tdfile = open(traindatafilename, 'wb')
        pickle.dump(trainData, tdfile)
        tdfile.close()

    sess.close()
Esempio n. 43
0
 def testCannotInferNum(self):
   x = tf.placeholder(np.float32)
   with self.assertRaisesRegexp(
       ValueError, r'Cannot infer num from shape TensorShape\(None\)'):
     tf.unpack(x)
Esempio n. 44
0
def fc_v2(inputs,
          input_dim,
          output_dim,
          name,
          rng,
          biases=True,
          init=None,
          weightnorm=None,
          gain=1.):
    """
    init: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)`
    """

    #with tf.name_scope(name) as scope:

    def uniform(stdev, size):
        if _weights_stdev is not None:
            stdev = _weights_stdev
        return rng.uniform(low=-stdev * np.sqrt(3),
                           high=stdev * np.sqrt(3),
                           size=size).astype('float32')

    if init == 'lecun':  # and input_dim != output_dim):
        # disabling orth. init for now because it's too slow
        weight_values = uniform(np.sqrt(1. / input_dim),
                                (input_dim, output_dim))

    elif init == 'glorot' or (init is None):

        weight_values = uniform(np.sqrt(2. / (input_dim + output_dim)),
                                (input_dim, output_dim))

    elif init == 'he':

        weight_values = uniform(np.sqrt(2. / input_dim),
                                (input_dim, output_dim))

    elif init == 'glorot_he':

        weight_values = uniform(np.sqrt(4. / (input_dim + output_dim)),
                                (input_dim, output_dim))

    elif init == 'orthogonal' or \
        (init == None and input_dim == output_dim):

        # From lasagne
        def sample(shape):
            if len(shape) < 2:
                raise RuntimeError("Only shapes of length 2 or more are "
                                   "supported.")
            flat_shape = (shape[0], np.prod(shape[1:]))
            # TODO: why normal and not uniform?
            a = rng.normal(0.0, 1.0, flat_shape)
            u, _, v = np.linalg.svd(a, full_matrices=False)
            # pick the one with the correct shape
            q = u if u.shape == flat_shape else v
            q = q.reshape(shape)
            return q.astype('float32')

        weight_values = sample((input_dim, output_dim))

    elif init[0] == 'uniform':

        weight_values = rng.uniform(low=-init[1],
                                    high=init[1],
                                    size=(input_dim,
                                          output_dim)).astype('float32')

    else:

        raise Exception('Invalid initialization!')

    weight_values *= gain

    weight = tf.get_variable(name + '_W',
                             initializer=tf.constant(weight_values))

    if weightnorm == None:
        weightnorm = _default_weightnorm
    if weightnorm:
        norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0))
        # norm_values = np.linalg.norm(weight_values, axis=0)

        target_norms = tf.get_variable(name + '.g',
                                       initializer=tf.constant(norm_values))

        with tf.name_scope('weightnorm') as scope:
            norms = tf.sqrt(
                tf.reduce_sum(tf.square(weight), reduction_indices=[0]))
            weight = weight * (target_norms / norms)

    # if 'Discriminator' in name:
    #     print "WARNING weight constraint on {}".format(name)
    #     weight = tf.nn.softsign(10.*weight)*.1

    if inputs.get_shape().ndims == 2:
        result = tf.matmul(inputs, weight)
    else:
        reshaped_inputs = tf.reshape(inputs, [-1, input_dim])
        result = tf.matmul(reshaped_inputs, weight)
        result = tf.reshape(
            result, tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim]))

    if biases:
        bias = tf.get_variable(name + '.b',
                               shape=output_dim,
                               initializer=tf.zeros_initializer())
        result = tf.nn.bias_add(result, bias)

    return result
Esempio n. 45
0
	def __init__(self, args, is_training=True):

		if not is_training:
			seq_length = 1
		else:
			seq_length = args.seq_length

		if args.model == 'rnn':
			cell_gen = rnn_cell.BasicRNNCell(args.rnn_size)
			cell_dis = rnn_cell.BasicRNNCell(args.rnn_size)
		elif args.model == 'gru':
			cell_gen = rnn_cell.GRUCell(args.rnn_size)
			cell_dis = rnn_cell.GRUCell(args.rnn_size)
		elif args.model == 'lstm':
			cell_gen = rnn_cell.BasicLSTMCell(args.rnn_size)
			cell_dis = rnn_cell.BasicLSTMCell(args.rnn_size)
		else:
			raise Exception('model type not supported: {}'.format(args.model))

		# Pass the generated sequences and targets (1)
		with tf.name_scope('input'):
			with tf.name_scope('data'):
				self.input_data  = tf.placeholder(tf.int32, [args.batch_size, seq_length])
			with tf.name_scope('targets'):
				self.targets     = tf.placeholder(tf.int32, [args.batch_size, seq_length])

		############
		# Generator
		############
		with tf.variable_scope('generator'):
			self.cell_gen = rnn_cell.MultiRNNCell([cell_gen] * args.num_layers)
			self.initial_state_gen = self.cell_gen.zero_state(args.batch_size, tf.float32)	

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
				softmax_b = tf.get_variable('softmax_b', [args.vocab_size])
				
				with tf.device('/cpu:0'):
					embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
					inputs_gen = tf.split(1, seq_length, tf.nn.embedding_lookup(
						embedding, self.input_data))
					inputs_gen = [tf.squeeze(i, [1]) for i in inputs_gen]

			outputs_gen, last_state_gen = seq2seq.rnn_decoder(inputs_gen, self.initial_state_gen, 
				self.cell_gen, loop_function=None)
			
			self.logits_sequence = []
			for output_gen in outputs_gen:
				logits_gen  = tf.nn.xw_plus_b(output_gen, softmax_w, softmax_b)
				self.logits_sequence.append(logits_gen)

			self.final_state_gen = last_state_gen

		################
		# Discriminator
		################
		with tf.variable_scope('discriminator'):
			self.cell_dis = rnn_cell.MultiRNNCell([cell_dis] * args.num_layers)
			self.initial_state_dis = self.cell_dis.zero_state(args.batch_size, tf.float32)

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2])
				softmax_b = tf.get_variable('softmax_b', [2])

				inputs_dis = []
				embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
				for logit in self.logits_sequence:
					inputs_dis.append(tf.matmul(logit, embedding))
					# inputs_dis.append(tf.matmul(tf.nn.softmax(logit), embedding))
					
				outputs_dis, last_state_dis = seq2seq.rnn_decoder(inputs_dis,
					self.initial_state_dis, self.cell_dis, loop_function=None)

			probs, logits = [], []
			for output_dis in outputs_dis:
				logit = tf.nn.xw_plus_b(output_dis, softmax_w, softmax_b)
				prob = tf.nn.softmax(logit)
				logits.append(logit)
				probs.append(prob)

			with tf.name_scope('summary'):
				probs      = tf.pack(probs)
				probs_real = tf.slice(probs, [0,0,1], [args.seq_length, args.batch_size, 1])
				variable_summaries(probs_real, 'probability of real')

			self.final_state_dis = last_state_dis

		#########
		# Train
		#########
		with tf.name_scope('train'):
			gen_loss = seq2seq.sequence_loss_by_example(
				logits,
				tf.unpack(tf.transpose(self.targets)), 
				tf.unpack(tf.transpose(tf.ones_like(self.targets, dtype=tf.float32))))

			self.gen_cost = tf.reduce_sum(gen_loss) / args.batch_size
			tf.scalar_summary('training loss', self.gen_cost)
			self.lr_gen = tf.Variable(0.0, trainable = False)		
			self.tvars 	= tf.trainable_variables()
			gen_vars    = [v for v in self.tvars if not v.name.startswith("discriminator/")]

			if is_training:
				gen_grads            = tf.gradients(self.gen_cost, gen_vars)
				self.all_grads       = tf.gradients(self.gen_cost, self.tvars)
				gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, args.grad_clip)
				gen_optimizer        = tf.train.AdamOptimizer(self.lr_gen)
				self.gen_train_op    = gen_optimizer.apply_gradients(
											zip(gen_grads_clipped, gen_vars))				

		with tf.name_scope('summary'):
			with tf.name_scope('weight_summary'):
				for v in self.tvars:
					variable_summaries(v, v.op.name)
			if is_training:
				with tf.name_scope('grad_summary'):
					for var, grad in zip(self.tvars, self.all_grads):
						variable_summaries(grad, 'grad/' + var.op.name)

		self.merged = tf.merge_all_summaries()
Esempio n. 46
0
    def getGraph(self, num_steps, state_size, learningRate=1e-4):
        graph = tf.Graph()  # create new graph

        with graph.as_default():
            with tf.name_scope('data'):
                inputs = tf.placeholder(self.dtype, [self.batch_size, num_steps, self.segment_len],
                                        name='input_placeholder')

                targets = tf.placeholder(self.dtype, [self.batch_size, self.num_classes],
                                         name='labels_placeholder')

                init_state = tf.placeholder(self.dtype, [self.batch_size, state_size],
                                            name='previous_state_placeholder')

            with tf.name_scope('params'):
                training = tf.placeholder(tf.bool, name="training")

            # list where each item have dim 50 x 25
            rnn_inputs = tf.unpack(inputs, axis=1, name='rnn_inputs')

            with tf.variable_scope('rnn_cell'):
                _ = self.getRnn_W(state_size=state_size)
                _ = self.getRnn_b(state_size=state_size)
                _ = self.get_pop_mean(outputDim=state_size)
                _ = self.get_pop_var(outputDim=state_size)
                _ = self.get_beta_offset(outputDim=state_size)
                _ = self.get_scale_gamma(outputDim=state_size)

            def rnn_cell(rnn_input, the_state):
                with tf.variable_scope('rnn_cell', reuse=True):
                    with tf.name_scope('rnn_cell_affine_layer'):
                        W = self.getRnn_W(state_size=state_size)
                        b = self.getRnn_b(state_size=state_size)

                        out_affine = tf.matmul(
                            tf.concat(1, [rnn_input, the_state]), W
                            # concat dimension, inputs, so you see that both the state and the inputs are being treated as one
                        ) + b

                    with tf.name_scope('rnn_cell_batch_norm'):
                        batchNorm = self.batchNormWrapper_byExponentialMovingAvg(
                            out_affine, training,
                            get_pop_mean=self.get_pop_mean,
                            get_pop_var=self.get_pop_var,
                            get_beta_offset=self.get_beta_offset,
                            get_scale_gamma=self.get_scale_gamma)

                    with tf.name_scope('rnn_cell_act_func'):
                        rnn_cell_out = tf.tanh(batchNorm)

                return rnn_cell_out

            state = init_state
            rnn_outputs = []
            for rnn_inpt in rnn_inputs:
                state = rnn_cell(rnn_inpt, state)
                rnn_outputs.append(state)

            # as we see here the outputs are the state outputs of each rnn.

            final_state_rnn_outputs = rnn_outputs[-1]  # final state

            with tf.variable_scope('readout'):
                # readout_weights = tf.Variable(
                #     tf.truncated_normal(
                #         [input_dim, output_dim], stddev=2. / (input_dim + output_dim) ** 0.5
                #     ),
                #     name='readout_weights'
                # )
                # readout_biases = tf.Variable(tf.zeros([output_dim]),
                #                              name='readout_biases')

                logits = fully_connected_layer_with_batch_norm(
                    "readout",
                    final_state_rnn_outputs,
                    input_dim = state_size,
                    output_dim = self.num_classes,
                    nonlinearity=tf.identity,
                    training=training,
                )

                #logits = tf.matmul(final_state_rnn_outputs, readout_weights) + readout_biases

            with tf.name_scope('error'):
                error = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(logits, targets)
                )

            with tf.name_scope('softmax'):  # this is only for kaggle
                softmax = tf.nn.softmax(logits)

            with tf.name_scope('accuracy'):
                accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1),
                                                           tf.argmax(targets, 1)), dtype=self.dtype))

            with tf.name_scope('train'):
                train_step = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(error)

            init = tf.global_variables_initializer()

        self.init = init
        self.outputs = final_state_rnn_outputs
        self.inputs = inputs
        self.targets = targets
        self.init_state = init_state
        self.train_step = train_step
        self.error = error
        self.accuracy = accuracy
        self.logits = logits
        self.softmax = softmax
        self.training = training

        return graph
Esempio n. 47
0
    def _add_seq2seq(self):
        hps = self._hps
        vsize = self._vocab.NumIds()

        with tf.variable_scope('seq2seq'):
            encoder_inputs = tf.unpack(
                tf.transpose(self._articles,
                             perm=[1, 0,
                                   2]))  # We unpack the inputs into one array
            decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
            targets = tf.unpack(tf.transpose(self._targets))
            loss_weights = tf.unpack(tf.transpose(self._loss_weights))
            article_lens = self._article_lens

            print("Here")
            # Embedding shared by the input and outputs.
            with tf.variable_scope('embedding'), tf.device(
                    self._next_device()):
                embedding = tf.get_variable(
                    'embedding', [vsize, hps.emb_dim],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4)
                )  ## Create a embedding matrix of size vsize*emb_dimension
                emb_decoder_inputs = [
                    tf.nn.embedding_lookup(embedding, x)
                    for x in decoder_inputs
                ]

            ## TODO: Change decoder embeddings also
            emb_encoder_inputs = encoder_inputs

            print("Here", len(emb_encoder_inputs))
            for layer_i in xrange(hps.enc_layers):
                with tf.variable_scope('encoder%d' % layer_i), tf.device(
                        self._next_device()):
                    cell_fw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=123),
                        state_is_tuple=False)
                    cell_bw = tf.nn.rnn_cell.LSTMCell(
                        hps.num_hidden,
                        initializer=tf.random_uniform_initializer(-0.1,
                                                                  0.1,
                                                                  seed=113),
                        state_is_tuple=False)
                    (emb_encoder_inputs, fw_state,
                     _) = tf.nn.bidirectional_rnn(cell_fw,
                                                  cell_bw,
                                                  emb_encoder_inputs,
                                                  dtype=tf.float32,
                                                  sequence_length=article_lens)

                    print(len(emb_encoder_inputs))
            encoder_outputs = emb_encoder_inputs

            print("Here")
            with tf.variable_scope('output_projection'), tf.device(
                    self._next_device()):
                w = tf.get_variable(
                    'w', [hps.num_hidden, vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                w_t = tf.transpose(w)
                v = tf.get_variable(
                    'v', [vsize],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            print("Here")
            with tf.variable_scope('decoder'), tf.device(self._next_device()):
                # When decoding, use model output from the previous step
                # for the next step.
                loop_function = None
                if hps.mode == 'decode':
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)

                cell = tf.nn.rnn_cell.LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)

                encoder_outputs = [
                    tf.reshape(x, [hps.batch_size, 1, 2 * hps.num_hidden])
                    for x in encoder_outputs
                ]
                self._enc_top_states = tf.concat(1, encoder_outputs)
                self._dec_in_state = fw_state
                # During decoding, follow up _dec_in_state are fed from beam_search.
                # dec_out_state are stored by beam_search for next step feeding.
                initial_state_attention = (hps.mode == 'decode')
                decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder(
                    emb_decoder_inputs,
                    self._dec_in_state,
                    self._enc_top_states,
                    cell,
                    num_heads=FLAGS.attn_heads,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

                ## Note : Check the effect of changinf num_heads

            with tf.variable_scope('output'), tf.device(self._next_device()):
                model_outputs = []
                for i in xrange(len(decoder_outputs)):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    model_outputs.append(
                        tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            if hps.mode == 'decode' or hps.mode == 'decode_server':
                with tf.variable_scope('decode_output'), tf.device(
                        self._next_device()):
                    best_outputs = [tf.argmax(x, 1) for x in model_outputs]
                    tf.logging.info('best_outputs%s',
                                    best_outputs[0].get_shape())
                    self._outputs = tf.concat(1, [
                        tf.reshape(x, [hps.batch_size, 1])
                        for x in best_outputs
                    ])

                    self._topk_log_probs, self._topk_ids = tf.nn.top_k(
                        tf.log(tf.nn.softmax(model_outputs[-1])),
                        hps.batch_size * 2)

            with tf.variable_scope('loss'), tf.device(self._next_device()):

                def sampled_loss_func(inputs, labels):
                    #if(True):
                    with tf.device('/cpu:0'):  # Try gpu.
                        labels = tf.reshape(labels, [-1, 1])
                        return tf.nn.sampled_softmax_loss(
                            w_t, v, inputs, labels, hps.num_softmax_samples,
                            vsize)

                if hps.num_softmax_samples != 0 and hps.mode == 'train':
                    self._loss = seq2seq_lib.sampled_sequence_loss(
                        decoder_outputs, targets, loss_weights,
                        sampled_loss_func)
                else:
                    self._loss = tf.nn.seq2seq.sequence_loss(
                        model_outputs, targets, loss_weights)
                tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
Esempio n. 48
0
  def add_seq2seq(self):
    hps = self.hps
    vsize = hps.vocabulary_size
    threshold=0.5

    with tf.variable_scope('seq2seq'):
      encoder_inputs = tf.unpack(tf.transpose(self.enc_batch))
      decoder_inputs = tf.unpack(tf.transpose(self.dec_batch))
      sent_encoder_inputs = tf.unpack(tf.transpose(self.sent_enc_batch))
      sent_decoder_inputs = tf.unpack(tf.transpose(self.sent_dec_batch))
      targets = tf.unpack(tf.transpose(self.target_batch))
      extend_targets = tf.unpack(tf.transpose(self.extend_target_batch))
      sent_targets = tf.unpack(tf.transpose(self.sent_target_batch))
      switch = tf.unpack(tf.transpose(self.switch_batch))
      word_weights = tf.unpack(tf.transpose(self.word_weights_batch))
      switch_weights = tf.unpack(tf.transpose(self.switch_weights_batch))
      sent_decwords=tf.unpack(tf.transpose(self.sent_decwords_batch,perm=[1,0,2]))
      words_decsent=tf.unpack(tf.transpose(self.words_decsent_batch,perm=[1,0,2]))
      weights_sent_decwords=tf.unpack(tf.transpose(self.weights_sent_decwords_batch,perm=[1,0,2]))
      weights_words_decsent=tf.unpack(tf.transpose(self.weights_words_decsent_batch,perm=[1,0,2]))
      enc_lens = self.enc_input_lens
      sent_enc_lens = self.sent_enc_input_lens
      
      
      with tf.variable_scope('embedding'): 
        embedding = tf.get_variable(
            'word_embedding',dtype=tf.float32,
            initializer=self.embed)
        emb_encoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in encoder_inputs]
        emb_decoder_inputs = [tf.nn.embedding_lookup(embedding, x)
                              for x in decoder_inputs]


      with tf.variable_scope('sent_embedding'):
        sent_embedding = tf.get_variable(
            'sent_embedding', [hps.sent_enc_timesteps, hps.emb_dim], dtype=tf.float32)
            
        sent_emb_decoder_inputs = [tf.nn.embedding_lookup(sent_embedding, x)
                              for x in sent_decoder_inputs]

      for layer_i in xrange(hps.enc_layers):
        with tf.variable_scope('encoder%d'%layer_i):
	  emb_encoder_inputs=tf.unpack(tf.nn.dropout(emb_encoder_inputs,0.5))
          cell_fw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden/2,
              initializer=tf.contrib.layers.xavier_initializer(uniform=True,seed=123),
              state_is_tuple=False)
          cell_bw = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden/2,
              initializer=tf.contrib.layers.xavier_initializer(uniform=True,seed=123),
              state_is_tuple=False)
          (emb_encoder_inputs, fw_state, bw_state) = tf.nn.bidirectional_rnn(
              cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32,
              sequence_length=enc_lens)


          
          
          
          
          
          
          
          
      encoder_outputs = emb_encoder_inputs

      
      sent_i=tf.transpose(encoder_outputs,perm=[1,0,2])
      
      index=tf.transpose(sent_encoder_inputs,perm=[1,0])
      
      sent_ip=tf.pack([tf.gather(sent_i[l],index[l]) for l in xrange(hps.batch_size)])
      sent_input=tf.unpack(tf.transpose(sent_ip,perm=[1,0,2]))





      for layer_i in xrange(hps.enc_layers):
        with tf.variable_scope('sent_encoder%d'%layer_i):
	  sent_input=tf.unpack(tf.nn.dropout(sent_input,0.5))
          cell_sent = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123),
              state_is_tuple=False)
          
          (sent_input, sent_fw_state) = tf.nn.rnn(
              cell_sent, sent_input, dtype=tf.float32,
              sequence_length=sent_enc_lens)
	  
      sent_encoder_outputs = sent_input
      


      with tf.variable_scope('decoder'):
        
        
        loop_function = None
        sent_loop_function = None
  

        self.cell = tf.nn.rnn_cell.LSTMCell(
            hps.num_hidden,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
            state_is_tuple=False)
	
        encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, hps.num_hidden])
                           for x in encoder_outputs]
        enc_top_states = tf.concat(1, encoder_outputs)
        
        dec_in_state=tf.concat(1,[fw_state,bw_state])




        with tf.variable_scope('sent_decoder'):
          self.sent_cell = tf.nn.rnn_cell.LSTMCell(
              hps.num_hidden,
              initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
              state_is_tuple=False)
    
        sent_encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, hps.num_hidden])
                           for x in sent_encoder_outputs]
        sent_enc_top_states = tf.concat(1, sent_encoder_outputs)

        
        if hps.mode== 'train':
          mode=True
        else:
          mode=False

        sent_dec_in_state = sent_fw_state
        sent_initial_state_attention = True 
	self.decoder_outputs, self.dec_out_state,self.sent_decoder_outputs, self.sent_dec_out_state,self.switch_output,self.switch_prob,self.decoder_outputs_dists,self.sent_decoder_outputs_dists = seq2seq.attention_decoder(
            emb_decoder_inputs,encoder_inputs, dec_in_state, enc_top_states,self.cell,
            sent_emb_decoder_inputs, sent_input,sent_dec_in_state, sent_enc_top_states,
            self.sent_cell,hps.dec_timesteps,switch=switch,word_weights=word_weights, mode_train=mode,num_heads=1, loop_function=loop_function,sent_loop_function=sent_loop_function,
            initial_state_attention=sent_initial_state_attention)

        switch_target=[tf.to_int32(tf.greater_equal(x,1)) for x in switch]



        final_dists = self._calc_final_dist(self.decoder_outputs_dists, self.sent_decoder_outputs_dists)
        

        log_dists = [tf.log(dist+1e-12) for dist in final_dists]
        with tf.variable_scope('loss'):
 
          loss_per_step = [] 
          batch_nums = tf.range(0, limit=hps.batch_size) 
	  sent_lens=1
	  word_lens=1
          for dec_step, log_dist in enumerate(log_dists):
            target = extend_targets[dec_step] 
            indices = tf.stack( (batch_nums, target), axis=1) 
            losses = tf.gather_nd(-log_dist, indices) 
	    w=(word_weights[dec_step]/word_lens)+(switch[dec_step]/sent_lens)
            loss_per_step.append(losses*w)
         
        self.loss =tf.reduce_mean(sum(loss_per_step))
	self.final_log_dists=final_dists 

      if  hps.mode!='decode':
        with tf.variable_scope('word_loss'):
          self.word_loss=self.get_loss(
                self.decoder_outputs, targets,self.word_weights_batch)  
 
        with tf.variable_scope('sent_loss'):
          self.sent_loss=self.get_loss(
                self.sent_decoder_outputs_dists, sent_targets,self.switch_batch) 
          

	with tf.variable_scope('switch_loss'):
          self.switch_loss=seq2seq.sequence_loss(
                self.switch_output,switch_target, switch_weights,
                softmax_loss_function=None)
  
	self.total_loss=self.loss+self.word_loss+self.sent_loss
        tf.scalar_summary('loss',tf.minimum(12.0,  self.loss))
Esempio n. 49
0
def build_model(words_size, embedding_size, oseq_len, source_len,
                simplified_len, defendant_nfilters, defendant_width,
                decoder_hidden, lstm_layer, batch_size, source_nfilters,
                source_width, is_train):

    args = construct_data(words_size=words_size,
                          embedding_size=embedding_size,
                          source_len=source_len,
                          simplified_len=simplified_len,
                          oseq_len=oseq_len,
                          decoder_hidden=decoder_hidden,
                          source_nfilters=source_nfilters,
                          source_width=source_width,
                          defendant_nfilters=defendant_nfilters,
                          defendant_width=defendant_width)

    embedding = args['embedding']
    conv_args = args['conv_args']
    weigth_generation = args['weigth_generation']
    bias_generation = args['bias_generation']
    source = args['source']
    defendant = args['defendant']
    defendant_length = args['defendant_length']
    label = args['label']
    decoder_inputs = args['decoder_inputs']
    loss_weights = args['loss_weights']
    keep_prob = args['keep_prob']
    sample_rate = args['sample_rate']

    conv_encoder = encoder_conv(source=source,
                                defendant=defendant,
                                conv_args=conv_args,
                                keep_prob=keep_prob,
                                embedding=embedding,
                                is_train=is_train)

    rnn_decoder, state_decoder = decoder_rnn(
        conv_encoder=conv_encoder,
        decoder_inputs=decoder_inputs,
        decoder_hidden=decoder_hidden,
        weigth_generation=weigth_generation,
        bias_generation=bias_generation,
        n_steps=oseq_len,
        batch_size=batch_size,
        lstm_layer=lstm_layer,
        keep_prob=keep_prob,
        embedding=embedding,
        sample_rate=sample_rate,
        is_train=is_train)

    cost = tf.reduce_mean(
        seq2seq.sequence_loss_by_example(
            logits=rnn_decoder,
            targets=tf.unpack(tf.transpose(label, [1, 0])),
            weights=tf.unpack(
                tf.transpose(
                    tf.convert_to_tensor(loss_weights, dtype=tf.float32),
                    [1, 0]))))

    words_prediction = tf.argmax(tf.transpose(tf.pack(rnn_decoder), [1, 0, 2]),
                                 2)

    print('build model ')

    return {
        'outputs': rnn_decoder,
        'embedding': embedding,
        'cost': cost,
        'sample_rate': sample_rate,
        'words_prediction': words_prediction,
        'source': source,
        'defendant': defendant,
        'defendant_length': defendant_length,
        'label': label,
        'decoder_inputs': decoder_inputs,
        'loss_weights': loss_weights,
        'keep_prob': keep_prob
    }
Esempio n. 50
0
def bidirectional_rnn(incoming,
                      rnncell_fw,
                      rnncell_bw,
                      return_seq=False,
                      return_states=False,
                      initial_state_fw=None,
                      initial_state_bw=None,
                      dynamic=False,
                      scope=None,
                      name="BiRNN"):
    """ Bidirectional RNN.

    Build a bidirectional recurrent neural network, it requires 2 RNN Cells
    to process sequence in forward and backward order. Any RNN Cell can be
    used i.e. SimpleRNN, LSTM, GRU... with its own parameters. But the two
    cells number of units must match.

    Input:
        3-D Tensor Layer [samples, timesteps, input dim].

    Output:
        if `return_seq`: 3-D Tensor [samples, timesteps, output dim].
        else: 2-D Tensor Layer [samples, output dim].

    Arguments:
        incoming: `Tensor`. The incoming Tensor.
        rnncell_fw: `RNNCell`. The RNN Cell to use for foward computation.
        rnncell_bw: `RNNCell`. The RNN Cell to use for backward computation.
        return_seq: `bool`. If True, returns the full sequence instead of
            last sequence output only.
        return_states: `bool`. If True, returns a tuple with output and
            states: (output, states).
        initial_state_fw: `Tensor`. An initial state for the forward RNN.
            This must be a tensor of appropriate type and shape [batch_size
            x cell.state_size].
        initial_state_bw: `Tensor`. An initial state for the backward RNN.
            This must be a tensor of appropriate type and shape [batch_size
            x cell.state_size].
        dynamic: `bool`. If True, dynamic computation is performed. It will not
            compute RNN steps above the sequence length. Note that because TF
            requires to feed sequences of same length, 0 is used as a mask.
            So a sequence padded with 0 at the end must be provided. When
            computation is performed, it will stop when it meets a step with
            a value of 0.
        scope: `str`. Define this layer scope (optional). A scope can be
            used to share variables between layers. Note that scope will
            override name.
        name: `str`. A name for this layer (optional).

    """
    assert (rnncell_fw._num_units == rnncell_bw._num_units), \
        "RNN Cells number of units must match!"

    sequence_length = None
    if dynamic:
        sequence_length = retrieve_seq_length_op(
            incoming if isinstance(incoming, tf.Tensor) else tf.pack(incoming))

    input_shape = utils.get_incoming_shape(incoming)

    with tf.variable_scope(scope, name, values=[incoming]) as scope:
        name = scope.name

        # TODO: DropoutWrapper

        inference = incoming
        # If a tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array]:
            ndim = len(input_shape)
            assert ndim >= 3, "Input dim should be at least 3."
            axes = [1, 0] + list(range(2, ndim))
            inference = tf.transpose(inference, (axes))
            inference = tf.unpack(inference)

        outputs, states_fw, states_bw = _brnn(
            rnncell_fw,
            rnncell_bw,
            inference,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            sequence_length=sequence_length,
            dtype=tf.float32)

        c = tf.GraphKeys.LAYER_VARIABLES + '/' + scope.name
        for v in [rnncell_fw.W, rnncell_fw.b, rnncell_bw.W, rnncell_bw.b]:
            if hasattr(v, "__len__"):
                for var in v:
                    tf.add_to_collection(c, var)
            else:
                tf.add_to_collection(c, v)

        # Track activations.
        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1])

    if dynamic:
        if return_seq:
            o = outputs
        else:
            outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])
            o = advanced_indexing_op(outputs, sequence_length)
    else:
        o = outputs if return_seq else outputs[-1]

    sfw = states_fw
    sbw = states_bw

    # Track output tensor.
    tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, o)

    return (o, sfw, sbw) if return_states else o
Esempio n. 51
0
def Linear(
    name,
    input_dim,
    output_dim,
    inputs,
    biases=True,
    initialization=None,
    weightnorm=None,
    gain=1.0,
):
    """
    initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)`
    """
    with tf.name_scope(name) as scope:

        def uniform(stdev, size):
            if _weights_stdev is not None:
                stdev = _weights_stdev
            return np.random.uniform(low=-stdev * np.sqrt(3),
                                     high=stdev * np.sqrt(3),
                                     size=size).astype("float32")

        if initialization == "lecun":  # and input_dim != output_dim):
            # disabling orth. init for now because it's too slow
            weight_values = uniform(np.sqrt(1.0 / input_dim),
                                    (input_dim, output_dim))

        elif initialization == "glorot" or (initialization == None):

            weight_values = uniform(np.sqrt(2.0 / (input_dim + output_dim)),
                                    (input_dim, output_dim))

        elif initialization == "he":

            weight_values = uniform(np.sqrt(2.0 / input_dim),
                                    (input_dim, output_dim))

        elif initialization == "glorot_he":

            weight_values = uniform(np.sqrt(4.0 / (input_dim + output_dim)),
                                    (input_dim, output_dim))

        elif initialization == "orthogonal" or (initialization == None
                                                and input_dim == output_dim):

            # From lasagne
            def sample(shape):
                if len(shape) < 2:
                    raise RuntimeError("Only shapes of length 2 or more are "
                                       "supported.")
                flat_shape = (shape[0], np.prod(shape[1:]))
                # TODO: why normal and not uniform?
                a = np.random.normal(0.0, 1.0, flat_shape)
                u, _, v = np.linalg.svd(a, full_matrices=False)
                # pick the one with the correct shape
                q = u if u.shape == flat_shape else v
                q = q.reshape(shape)
                return q.astype("float32")

            weight_values = sample((input_dim, output_dim))

        elif initialization[0] == "uniform":

            weight_values = np.random.uniform(
                low=-initialization[1],
                high=initialization[1],
                size=(input_dim, output_dim),
            ).astype("float32")

        else:

            raise Exception("Invalid initialization!")

        weight_values *= gain

        weight = lib.param(name + ".W", weight_values)

        if weightnorm == None:
            weightnorm = _default_weightnorm
        if weightnorm:
            norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0))
            # norm_values = np.linalg.norm(weight_values, axis=0)

            target_norms = lib.param(name + ".g", norm_values)

            with tf.name_scope("weightnorm") as scope:
                norms = tf.sqrt(
                    tf.reduce_sum(tf.square(weight), reduction_indices=[0]))
                weight = weight * (target_norms / norms)

        # if 'Discriminator' in name:
        #     print "WARNING weight constraint on {}".format(name)
        #     weight = tf.nn.softsign(10.*weight)*.1

        if inputs.get_shape().ndims == 2:
            result = tf.matmul(inputs, weight)
        else:
            reshaped_inputs = tf.reshape(inputs, [-1, input_dim])
            result = tf.matmul(reshaped_inputs, weight)
            result = tf.reshape(
                result,
                tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim]))

        if biases:
            result = tf.nn.bias_add(
                result,
                lib.param(name + ".b", np.zeros((output_dim, ),
                                                dtype="float32")))

        return result
Esempio n. 52
0
 def transform(cls, x, return_log_jac=False):
     transformed = tf.unpack(x, axis=axis)[idx]
     if return_log_jac:
         return transformed, 0.0
     else:
         return transformed
    def __init__(self, vocab_size, embedding_size, state_size, num_layers,
                 num_samples, max_seq_length, max_gradient_norm, cell_type,
                 optimizer, learning_rate):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.state_size = state_size
        self.num_layers = num_layers
        self.max_seq_length = max_seq_length
        self.max_gradient_norm = max_gradient_norm
        self.cell_type = cell_type
        self.num_samples = num_samples
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.is_train = True  # false for test
        self.global_step = tf.Variable(0, trainable=False)
        '''创建输入、目标变量; create encoder and decoder variables'''
        self.encoder_inputs = tf.placeholder(
            tf.int32, [self.max_seq_length, None]
        )  # [max_seq_length * batch_size] tensor representing input sequences, None for variable batch_size
        self.encoder_lengths = tf.placeholder(
            tf.int32, [None]
        )  # [batch_size] tensor recording each sequence's length, used by rnn cell to decide when to finish computing
        self.decoder_inputs = tf.placeholder(
            tf.int32, [self.max_seq_length + 2, None]
        )  # decoder_inputs add the 'GO' and 'EOS' symbol, so 2 more time steps
        self.decoder_weights = tf.placeholder(
            tf.float32, [self.max_seq_length + 2, None]
        )  # for the padded parts in a sequence, the weights are 0.0, which means we don't care about their loss
        '''创建输出映射; create output projection variables'''
        # what is output projection?
        # decoder rnn output at step t (lets call it o_t) is [state_size] dimentional; o_t*w+b is [vocab_size] dimentional, so the decoder generate words by w_t = argmax_w{o_t*w+b}
        w = tf.get_variable("proj_w", [self.state_size, self.vocab_size])
        w_t = tf.transpose(w)
        b = tf.get_variable("proj_b", [self.vocab_size])
        output_projection = (w, b)
        # what is softmax_loss_function?
        # an in-complete softmax model which considers only [num_samples] classes to simplify loss calculation. you don't need to care about the details because the tf.nn.sampled_softmax_loss function do it automatically
        softmax_loss_function = None
        if self.num_samples > 0 and self.num_samples < self.vocab_size:

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(weights=w_t,
                                                  biases=b,
                                                  inputs=inputs,
                                                  labels=labels,
                                                  num_sampled=self.num_samples,
                                                  num_classes=self.vocab_size)

            softmax_loss_function = sampled_loss
        '''创建embedding表和embedding之后的输入; create embedding and embedded inputs'''
        with tf.device("/cpu:0"):  # embedding lookup only works with cpu
            embedding = tf.get_variable("embedding",
                                        [self.vocab_size, self.embedding_size])
            embedded_encoder_inputs = tf.unpack(
                tf.nn.embedding_lookup(embedding, self.encoder_inputs)
            )  # embedding_lookup function gets a sequence's embedded representation
            embedded_decoder_inputs = tf.unpack(
                tf.nn.embedding_lookup(embedding, self.decoder_inputs))
        '''创建rnn神经元; create rnn cell'''
        cell = tf.nn.rnn_cell.BasicLSTMCell(self.state_size,
                                            state_is_tuple=True)
        if cell_type == 'gru':
            cell = tf.nn.rnn_cell.GRUCell(self.state_size)
        if self.num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.num_layers)
        '''创建编码结果; create encoder result'''
        # here we encode the sequences to encoder_states, note that the encoder_state of a sequence is [num_layers*state_size] dimentional because it records all layers' states
        encoder_outputs, self.encoder_states = rnn.rnn(
            cell,
            embedded_encoder_inputs,
            sequence_length=self.encoder_lengths,
            dtype=dtypes.float32)
        '''创建解码结果; create decoder result'''

        # weiredly, we need a loop_function here, because:
        # commonly, the seq-to-seq framework works at two modes: when training, it uses the groundtruth w_t as step-t's input
        # but when predicting, it uses a loop_function to pass the previous prediction result to current step as the input
        def loop_function(prev, _):
            prev = tf.matmul(prev, output_projection[0]) + output_projection[
                1]  # get each word's probability
            prev_symbol = tf.math_ops.argmax(
                prev, 1)  # get the most likely prediction word
            emb_prev = tf.nn.embedding_lookup(
                embedding,
                prev_symbol)  # embed the word as the next step's input
            return emb_prev

        # here we initialize the decoder_rnn with encoder_states and then try to recover the whole sequence by running the rnn
        # as it is said above, the decoder will cheat by looking into the groundtruth (only in training)
        # the decoder_outputs records each step's prediction result
        self.decoder_outputs, decoder_states = tf.nn.seq2seq.rnn_decoder(
            embedded_decoder_inputs,
            self.encoder_states,
            cell,
            loop_function=None if self.is_train else loop_function)
        self.decoder_outputs = [
            tf.matmul(one, output_projection[0]) + output_projection[1]
            for one in self.decoder_outputs
        ]
        '''创建损失函数; create loss function'''
        # as an instance, if a sequence is [GO,w1,w2,w3,EOS],then at step 0, the decoder accept 'GO', and try to predict w1, and so on... therefore decoder_truth is decoder_inputs add 1
        decoder_truth = [
            tf.unpack(self.decoder_inputs)[i + 1]
            for i in xrange(self.max_seq_length + 1)
        ]
        # loss can by automatically cauculated with tf.nn.seq2seq.sequence_loss, and it is batch-size-normalized.
        self.loss = tf.nn.seq2seq.sequence_loss(
            self.decoder_outputs[:-1], decoder_truth,
            tf.unpack(self.decoder_weights)[:-1])
        '''创建梯度; create gradients'''
        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, norm = tf.clip_by_global_norm(
            gradients,
            self.max_gradient_norm)  # gradient clip is frequently used in rnn
        '''创建优化算法; create optimizer'''
        opt = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        if self.optimizer == 'adadelta':
            opt = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate)
        self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                          global_step=self.global_step)
        '''创建保存器; create saver'''
        self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=10)
Esempio n. 54
0
    def get_outputs(self, inputs, input_seq_length, classifier):
        '''compute the outputs of the decoder

        Args:
            inputs: The inputs to the network as a
                [batch_size x max_input_length x input_dim] tensor
            input_seq_length: The sequence length of the inputs as a
                [batch_size] vector
            classifier: The classifier object that will be used in decoding

        Returns:
            A list with batch_size elements containing nbest lists with elements
            containing pairs of score and output labels
        '''

        #encode the inputs [batch_size x output_length x output_dim]
        hlfeat = classifier.encoder(self.inputs, self.input_seq_length, False)

        #repeat the high level features for all beam elements
        hlfeat = tf.reshape(
            tf.tile(tf.expand_dims(hlfeat, 1),
                    [1, int(self.conf['beam_width']), 1, 1]), [
                        int(self.conf['beam_width']) * self.batch_size,
                        int(hlfeat.get_shape()[1]),
                        int(hlfeat.get_shape()[2])
                    ])

        def body(step, beam, first_step=False, check_finished=True):
            '''the body of the decoding while loop

            Args:
                beam: a Beam object containing the current beam
                first_step: whether or not this is the first step in decoding
                check_finished: finish a beam element if a sentence border
                    token is observed

            returns:
                the loop vars'''

            with tf.variable_scope('body'):

                #put the last output in the correct format
                # [batch_size x beam_width]
                prev_output = beam.sequences[:, :, step]

                #put the prev_output and state in the correct shape so all
                #beam elements from all batches are processed in parallel
                #[batch_size*beam_width x 1]
                prev_output = tf.expand_dims(tf.reshape(prev_output, [-1]), 1)

                states = [
                    tf.reshape(s, [-1, int(s.get_shape()[2])])
                    for s in nest.flatten(beam.states)
                ]
                states = nest.pack_sequence_as(beam.states, states)

                #compute the next state and logits
                logits, states = classifier.decoder(hlfeat=hlfeat,
                                                    encoder_inputs=prev_output,
                                                    initial_state=states,
                                                    first_step=first_step,
                                                    is_training=False)

                #get the attenion tensor
                if first_step:
                    attention_name = (
                        tf.get_default_graph()._name_stack + '/' +
                        type(classifier.decoder).__name__ +
                        '/attention_decoder/Attention_0/Softmax:0')
                else:
                    attention_name = (tf.get_default_graph()._name_stack +
                                      '/' + type(classifier.decoder).__name__ +
                                      '/attention_decoder/attention_decoder/' +
                                      'Attention_0/Softmax:0')

                attention = tf.get_default_graph().get_tensor_by_name(
                    attention_name)

                #put the states and logits in the format for the beam
                states = [
                    tf.reshape(s, [
                        self.batch_size,
                        int(self.conf['beam_width']),
                        int(s.get_shape()[1])
                    ]) for s in nest.flatten(states)
                ]
                states = nest.pack_sequence_as(beam.states, states)
                logits = tf.reshape(logits, [
                    self.batch_size,
                    int(self.conf['beam_width']),
                    int(logits.get_shape()[2])
                ])

                attention = tf.reshape(attention, [
                    self.batch_size,
                    int(self.conf['beam_width']),
                    int(attention.get_shape()[1])
                ])

                #update the beam
                beam = beam.update(logits, states, attention, step,
                                   check_finished)

                step = step + 1

            return step, beam

        def cb_cond(step, beam):
            '''the condition of the decoding while loop

            Args:
                step: the decoding step
                beam: a Beam object containing the current beam

            returns:
                a boolean that evaluates to True if the loop should
                continue'''

            with tf.variable_scope('cond'):

                #check if all beam elements have terminated
                cont = tf.logical_and(
                    tf.logical_not(
                        beam.all_terminated(step, classifier.output_dim - 1)),
                    tf.less(step, int(self.conf['max_steps'])))

            return cont

        #initialise the loop variables
        negmax = tf.tile([[-tf.float32.max]],
                         [self.batch_size,
                          int(self.conf['beam_width']) - 1])
        scores = tf.concat([tf.zeros([self.batch_size, 1]), negmax], 1)
        lengths = tf.ones(
            [self.batch_size, int(self.conf['beam_width'])], dtype=tf.int32)
        sequences = tf.constant(classifier.output_dim - 1,
                                shape=[
                                    self.batch_size,
                                    int(self.conf['beam_width']),
                                    int(self.conf['max_steps'])
                                ],
                                dtype=tf.int32)
        states = classifier.decoder.zero_state(
            int(self.conf['beam_width']) * self.batch_size)
        flat_states = [
            tf.reshape(s, [
                self.batch_size,
                int(self.conf['beam_width']),
                int(s.get_shape()[1])
            ]) for s in nest.flatten(states)
        ]
        states = nest.pack_sequence_as(states, flat_states)
        attention = tf.zeros([
            self.batch_size,
            int(self.conf['beam_width']),
            int(hlfeat.get_shape()[1]),
            int(self.conf['max_steps'])
        ])

        beam = Beam(sequences, lengths, states, scores, attention)
        step = tf.constant(0)

        #do the first step because the initial state should not be used
        #to compute a context
        step, beam = body(step, beam, True, False)

        #run the rest of the decoding loop
        _, beam = tf.while_loop(cond=cb_cond,
                                body=body,
                                loop_vars=[step, beam],
                                parallel_iterations=1,
                                back_prop=False)

        with tf.variable_scope('cut_sequences'):
            #get the beam scores
            scores = [tf.unpack(s) for s in tf.unpack(beam.scores)]

            #cut the beam sequences to the correct length and take of
            #the sequence border tokens
            sequences = [tf.unpack(s) for s in tf.unpack(beam.sequences)]
            lengths = [tf.unpack(l) for l in tf.unpack(beam.lengths)]
            attention = [tf.unpack(a) for a in tf.unpack(beam.attention)]
            hlfeat = tf.unpack(hlfeat)
            sequences = [[
                sequences[i][j][1:lengths[i][j] - 1]
                for j in range(len(lengths[i]))
            ] for i in range(len(lengths))]
            attention = [[
                attention[i][j][:, 1:lengths[i][j]]
                for j in range(len(lengths[i]))
            ] for i in range(len(lengths))]

        outputs = [[(scores[i][j], sequences[i][j], attention[i][j], hlfeat[i])
                    for j in range(len(sequences[i]))]
                   for i in range(len(sequences))]

        return outputs
Esempio n. 55
0
    def _testDynamicEquivalentToStaticRNN(self, use_gpu):
        time_steps = 8
        num_units = 3
        num_proj = 4
        input_size = 5
        batch_size = 2

        input_values = np.random.randn(time_steps, batch_size, input_size)

        sequence_length = np.random.randint(0, time_steps, size=batch_size)

        ########### Step 1: Run static graph and generate readouts
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            concat_inputs = tf.placeholder(tf.float32,
                                           shape=(time_steps, batch_size, input_size))
            inputs = tf.unpack(concat_inputs)
            initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)

            cell = tf.nn.rnn_cell.LSTMCell(
                num_units, input_size, use_peepholes=True,
                initializer=initializer, num_proj=num_proj)

            with tf.variable_scope("dynamic_scope"):
                outputs_static, state_static = tf.nn.rnn(
                    cell, inputs, sequence_length=sequence_length, dtype=tf.float32)

            feeds = {concat_inputs: input_values}

            # Initialize
            tf.initialize_all_variables().run(feed_dict=feeds)

            # Generate gradients of sum of outputs w.r.t. inputs
            static_gradients = tf.gradients(
                outputs_static + [state_static], [concat_inputs])

            # Generate gradients of individual outputs w.r.t. inputs
            static_individual_gradients = _flatten([
                                                       tf.gradients(y, [concat_inputs])
                                                       for y in [outputs_static[0],
                                                                 outputs_static[-1],
                                                                 state_static]])

            # Generate gradients of individual variables w.r.t. inputs
            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            assert len(trainable_variables) > 1, (
                "Count of trainable variables: %d" % len(trainable_variables))
            # pylint: disable=bad-builtin
            static_individual_variable_gradients = _flatten([
                                                                tf.gradients(y, trainable_variables)
                                                                for y in [outputs_static[0],
                                                                          outputs_static[-1],
                                                                          state_static]])

            # Test forward pass
            values_static = sess.run(outputs_static, feed_dict=feeds)
            (state_value_static,) = sess.run((state_static,), feed_dict=feeds)

            # Test gradients to inputs and variables w.r.t. outputs & final state
            static_grad_values = sess.run(static_gradients, feed_dict=feeds)

            static_individual_grad_values = sess.run(
                static_individual_gradients, feed_dict=feeds)

            static_individual_var_grad_values = sess.run(
                static_individual_variable_gradients, feed_dict=feeds)

        ########## Step 2: Run dynamic graph and generate readouts
        with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
            concat_inputs = tf.placeholder(tf.float32,
                                           shape=(time_steps, batch_size, input_size))
            inputs = tf.unpack(concat_inputs)
            initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)

            cell = tf.nn.rnn_cell.LSTMCell(
                num_units, input_size, use_peepholes=True,
                initializer=initializer, num_proj=num_proj)

            with tf.variable_scope("dynamic_scope"):
                outputs_dynamic, state_dynamic = tf.nn.dynamic_rnn(
                    cell, inputs=concat_inputs, sequence_length=sequence_length,
                    time_major=True, dtype=tf.float32)
                split_outputs_dynamic = tf.unpack(outputs_dynamic, time_steps)

            feeds = {concat_inputs: input_values}

            # Initialize
            tf.initialize_all_variables().run(feed_dict=feeds)

            # Generate gradients of sum of outputs w.r.t. inputs
            dynamic_gradients = tf.gradients(
                split_outputs_dynamic + [state_dynamic], [concat_inputs])

            # Generate gradients of several individual outputs w.r.t. inputs
            dynamic_individual_gradients = _flatten([
                                                        tf.gradients(y, [concat_inputs])
                                                        for y in [split_outputs_dynamic[0],
                                                                  split_outputs_dynamic[-1],
                                                                  state_dynamic]])

            # Generate gradients of individual variables w.r.t. inputs
            trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            assert len(trainable_variables) > 1, (
                "Count of trainable variables: %d" % len(trainable_variables))
            dynamic_individual_variable_gradients = _flatten([
                                                                 tf.gradients(y, trainable_variables)
                                                                 for y in [split_outputs_dynamic[0],
                                                                           split_outputs_dynamic[-1],
                                                                           state_dynamic]])

            # Test forward pass
            values_dynamic = sess.run(split_outputs_dynamic, feed_dict=feeds)
            (state_value_dynamic,) = sess.run(
                (state_dynamic,), feed_dict=feeds)

            # Test gradients to inputs and variables w.r.t. outputs & final state
            dynamic_grad_values = sess.run(dynamic_gradients, feed_dict=feeds)

            dynamic_individual_grad_values = sess.run(
                dynamic_individual_gradients, feed_dict=feeds)

            dynamic_individual_var_grad_values = sess.run(
                dynamic_individual_variable_gradients, feed_dict=feeds)

        ######### Step 3: Comparisons
        self.assertEqual(len(values_static), len(values_dynamic))
        for (value_static, value_dynamic) in zip(values_static, values_dynamic):
            self.assertAllEqual(value_static, value_dynamic)
        self.assertAllEqual(state_value_static, state_value_dynamic)

        self.assertAllEqual(static_grad_values, dynamic_grad_values)

        self.assertEqual(len(static_individual_grad_values),
                         len(dynamic_individual_grad_values))
        self.assertEqual(len(static_individual_var_grad_values),
                         len(dynamic_individual_var_grad_values))

        for i, (a, b) in enumerate(zip(static_individual_grad_values,
                                       dynamic_individual_grad_values)):
            tf.logging.info("Comparing individual gradients iteration %d" % i)
            self.assertAllEqual(a, b)

        for i, (a, b) in enumerate(zip(static_individual_var_grad_values,
                                       dynamic_individual_var_grad_values)):
            tf.logging.info(
                "Comparing individual variable gradients iteraiton %d" % i)
            self.assertAllEqual(a, b)
Esempio n. 56
0
def inference(documents, doc_mask, query, query_mask):
    embedding = tf.get_variable(
        'embedding', [FLAGS.vocab_size, FLAGS.embedding_size],
        initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

    regularizer = tf.nn.l2_loss(embedding)

    doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents),
                            FLAGS.dropout_keep_prob)
    doc_emb.set_shape([None, None, FLAGS.embedding_size])

    query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query),
                              FLAGS.dropout_keep_prob)
    query_emb.set_shape([None, None, FLAGS.embedding_size])

    with tf.variable_scope('document', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell,
            back_cell,
            doc_emb,
            sequence_length=tf.to_int64(doc_len),
            dtype=tf.float32)
        # h_doc = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
        h_doc = tf.concat(2, h)

    with tf.variable_scope('query', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        query_len = tf.reduce_sum(query_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell,
            back_cell,
            query_emb,
            sequence_length=tf.to_int64(query_len),
            dtype=tf.float32)
        # h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
        h_query = tf.concat(2, h)

    M = tf.batch_matmul(h_doc, h_query, adj_y=True)
    M_mask = tf.to_float(
        tf.batch_matmul(tf.expand_dims(doc_mask, -1),
                        tf.expand_dims(query_mask, 1)))

    alpha = softmax(M, 1, M_mask)
    beta = softmax(M, 2, M_mask)

    # query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1)
    query_importance = tf.expand_dims(
        tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)

    s = tf.squeeze(tf.batch_matmul(alpha, query_importance), [2])

    unpacked_s = zip(tf.unpack(s, FLAGS.batch_size),
                     tf.unpack(documents, FLAGS.batch_size))
    y_hat = tf.pack([
        tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size)
        for (attentions, sentence_ids) in unpacked_s
    ])

    return y_hat, regularizer
    def build_generator(self):
        image = tf.placeholder(tf.float32,
                               [self.batch_size / 2, self.dim_image])
        question = tf.placeholder(tf.int32,
                                  [self.batch_size / 2, self.max_words_q])
        answer = tf.placeholder(tf.int32,
                                [self.batch_size / 2, self.max_words_q])

        # state = tf.zeros([self.batch_size, self.stacked_lstm.state_size])
        state_que = tf.zeros(
            [self.batch_size / 2, self.rnn_size * self.rnn_layer])  #zhe
        state_ans = tf.zeros(
            [self.batch_size / 2, self.rnn_size * self.rnn_layer])  #zhe
        question_ans = tf.concat(0, [question, answer])
        loss = 0.0

        inputs = tf.nn.embedding_lookup(self.embed_ques_W, question_ans)
        inputs = tf.unpack(tf.transpose(inputs, [1, 0, 2]))
        tf.get_variable_scope().reuse_variables()
        # pdb.set_trace()
        #output, _, _ = tf.nn.bidirectional_rnn(self.forward_dropout, self.backward_dropout, inputs, dtype=tf.float32)
        #state_que = output[-1][0:250,:]
        #state_ans = output[-1][250:,:]

        output, state_fw, state_bw = tf.nn.bidirectional_rnn(
            self.forward_dropout,
            self.backward_dropout,
            inputs,
            dtype=tf.float32)
        #state = tf.concat(1,[state_fw, state_bw])
        state = tf.mul(state_fw, state_bw)
        state_que = state[0:250, :]
        state_ans = state[250:, :]
        '''
        for i in range(max_words_q):
            if i==0:
                blstm_emb_linear = tf.zeros([self.batch_size, self.input_embedding_size])
            else:
                tf.get_variable_scope().reuse_variables()
                blstm_emb_linear = tf.nn.embedding_lookup(self.embed_ques_W, question_ans[:,i-1])
            blstm_emb_drop = tf.nn.dropout(blstm_emb_linear, 1-self.drop_out_rate)
            blstm_emb = tf.tanh(blstm_emb_drop)

            output, state = self.stacked_lstm(blstm_emb, state)
            state_que = state[0:250,:]    #zhe
            state_ans = state[250:,:]  #zhe
        '''

        # multimodal (fusing question & image)
        Q_drop = tf.nn.dropout(state_que, 1 - self.drop_out_rate)
        Q_linear = tf.nn.xw_plus_b(Q_drop, self.embed_Q_W, self.embed_Q_b)
        Q_emb = tf.tanh(Q_linear)

        image_drop = tf.nn.dropout(image, 1 - self.drop_out_rate)
        image_linear = tf.nn.xw_plus_b(image_drop, self.embed_image_W,
                                       self.embed_image_b)
        image_emb = tf.tanh(image_linear)

        A_drop = tf.nn.dropout(state_ans, 1 - self.drop_out_rate)
        A_linear = tf.nn.xw_plus_b(A_drop, self.embed_A_W, self.embed_A_b)
        A_emb = tf.tanh(A_linear)

        QI = tf.mul(Q_emb, image_emb)

        QI_drop = tf.nn.dropout(QI, 1 - self.drop_out_rate)
        QI_linear = tf.nn.xw_plus_b(QI_drop, self.embed_QI_W, self.embed_QI_b)
        QI_emb = tf.tanh(QI_linear)

        QIA = tf.mul(QI_emb, A_emb)
        scores_emb = tf.nn.xw_plus_b(QIA, self.embed_scor_W,
                                     self.embed_scor_b)  #zhe
        # Calculate cross entropy
        #cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores_emb, labels=label)   #zhe
        generated_ANS = tf.transpose(scores_emb)

        return generated_ANS, image, question, answer
Esempio n. 58
0
print(y)

batchX_placeholder = tf.placeholder(
    tf.float32, [batch_size, truncated_backprop_length])  # 5, 15
batchY_placeholder = tf.placeholder(
    tf.int32, [batch_size, truncated_backprop_length])  # 5, 15

init_state = tf.placeholder(tf.float32, [batch_size, state_size])  # 5, 4

W2 = tf.Variable(np.random.rand(state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.zeros((1, num_classes)), dtype=tf.float32)

# Unpack columns
inputs_series = tf.split(1, truncated_backprop_length, batchX_placeholder)
#inputs_series = tf.unpack(batchX_placeholder, 1)
labels_series = tf.unpack(batchY_placeholder, 1)

# Forward passes
cell = tf.nn.rnn_cell.BasicRNNCell(state_size)
states_series, current_state = tf.nn.rnn(cell, inputs_series, init_state)

logits_series = [tf.matmul(state, W2) + b2
                 for state in states_series]  #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [
    tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)
    for logits, labels in zip(logits_series, labels_series)
]
total_loss = tf.reduce_mean(losses)
Esempio n. 59
0
def _rnn_template(incoming,
                  cell,
                  dropout=None,
                  return_seq=False,
                  return_state=False,
                  initial_state=None,
                  dynamic=False,
                  scope=None,
                  name="LSTM"):
    """ RNN Layer Template. """
    sequence_length = None
    if dynamic:
        sequence_length = retrieve_seq_length_op(
            incoming if isinstance(incoming, tf.Tensor) else tf.pack(incoming))

    input_shape = utils.get_incoming_shape(incoming)

    # Variable Scope fix for older TF
    try:
        vscope = tf.variable_scope(scope, default_name=name, values=[incoming])
    except Exception:
        vscope = tf.variable_op_scope([incoming], scope, name)

    with vscope as scope:
        name = scope.name

        _cell = cell
        # Apply dropout
        if dropout:
            if type(dropout) in [tuple, list]:
                in_keep_prob = dropout[0]
                out_keep_prob = dropout[1]
            elif isinstance(dropout, float):
                in_keep_prob, out_keep_prob = dropout, dropout
            else:
                raise Exception("Invalid dropout type (must be a 2-D tuple of "
                                "float)")
            cell = DropoutWrapper(cell, in_keep_prob, out_keep_prob)

        inference = incoming
        # If a tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array]:
            ndim = len(input_shape)
            assert ndim >= 3, "Input dim should be at least 3."
            axes = [1, 0] + list(range(2, ndim))
            inference = tf.transpose(inference, (axes))
            inference = tf.unpack(inference)

        outputs, state = _rnn(cell,
                              inference,
                              dtype=tf.float32,
                              initial_state=initial_state,
                              scope=name,
                              sequence_length=sequence_length)

        # Retrieve RNN Variables
        c = tf.GraphKeys.LAYER_VARIABLES + '/' + scope.name
        for v in [_cell.W, _cell.b]:
            if hasattr(v, "__len__"):
                for var in v:
                    tf.add_to_collection(c, var)
            else:
                tf.add_to_collection(c, v)
        # Track activations.
        tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, outputs[-1])

    if dynamic:
        if return_seq:
            o = outputs
        else:
            outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])
            o = advanced_indexing_op(outputs, sequence_length)
    else:
        o = outputs if return_seq else outputs[-1]

    # Track output tensor.
    tf.add_to_collection(tf.GraphKeys.LAYER_TENSOR + '/' + name, o)

    return (o, state) if return_state else o
Esempio n. 60
0
def main(args):
  
    network = importlib.import_module(args.model_def, 'inference')

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)
    random.seed(args.seed)
    train_set = facenet.get_dataset(args.data_dir)
    if args.filter_filename:
        train_set = filter_dataset(train_set, args.filter_filename, 
            args.filter_percentile, args.filter_min_nrof_images_per_class)
    nrof_classes = len(train_set)
    
    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        #pretrained_model = os.path.expanduser(args.pretrained_model)
        ## edit by mzh
        meta_file, ckpt_file = facenet.get_model_filenames(args.pretrained_model)
        pretrained_model = os.path.join(os.path.expanduser(args.pretrained_model), ckpt_file)

        print('Pre-trained model: %s' % pretrained_model)
    
    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)
    
    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)
        
        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)
        image_list, label_list = facenet.shuffle_examples(image_list, label_list)

        learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
        
        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
        
        image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths')

        labels_placeholder = tf.placeholder(tf.int64, shape=(None,1), name='labels')
        
        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                    dtypes=[tf.string, tf.int64],
                                    shapes=[(1,), (1,)],
                                    shared_name=None, name=None)
        ## enque_op input the images/labels to the queue to be read later
        enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op')
        
        nrof_preprocess_threads = 4
        images_and_labels = []
        for _ in range(nrof_preprocess_threads): # multi threads to read the element in the queue (i.e. images, labels)
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unpack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_png(file_contents)
                if args.random_rotate:
                    image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8)
                if args.random_crop:
                    image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                else:
                    image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    image = tf.image.random_flip_left_right(image)
    
                #pylint: disable=no-member
                image.set_shape((args.image_size, args.image_size, 3))
                images.append(tf.image.per_image_standardization(image))
            images_and_labels.append([images, label])
        # if using mutlti threads to read the image, labels parallism , it needs to use tf.train.batch_join instead of the tf.train.batch or tf.train.shuffle_batch to produce the image / label batch to train or evaluate
        image_batch, label_batch = tf.train.batch_join(
            images_and_labels, batch_size=batch_size_placeholder, 
            shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        label_batch = tf.identity(label_batch, 'label_batch')
        
        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))
        
        print('Building training graph')
        
        # Build the inference graph
        prelogits, _ = network.inference(image_batch, args.keep_probability, 
            phase_train=phase_train_placeholder, weight_decay=args.weight_decay)
        logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                weights_regularizer=slim.l2_regularizer(args.weight_decay),
                scope='Logits', reuse=False)

        embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

        # Add center loss
        if args.center_loss_factor>0.0:
            prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
            args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits, label_batch, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)
        
        # Calculate the total losses
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer, 
            learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms)
        
        # Create a saver
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        ## This is the start to run the input pipeline filling the example queue so that the dequeue can get the examples
        tf.train.start_queue_runners(sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)


            # Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder,
                    learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, 
                    total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

                # Evaluate on LFW
                if args.lfw_dir:
                    evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, 
                        embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer)
    return model_dir