コード例 #1
0
ファイル: copy_pattern.py プロジェクト: PFCM/datasets
def get_online_sequences(sequence_length, batch_size,
                         pattern_length=10):
    """Gets tensors which produce new random examples every time
    they are evaluated.

    Args:
        sequence_length: the length of the time-lag the model has to
            remember the sequence for.
        batch_size: how many at once.
        pattern_length: the length of the pattern that has to be
            remembered and regurgitated.

    Returns:
        (data, targets): data is
            `[sequence_length + 2*pattern_length, batch_size, 1]`, targets
            are also `[sequence_length + 2*pattern_length, batch_size, 1]`.
    """
    # first we need a pattern to remember
    pattern = tf.random_uniform([pattern_length, batch_size, 1], maxval=8,
                                dtype=tf.int32)
    central_fillers = tf.fill([sequence_length-1, batch_size, 1], 8)
    go = tf.fill([1, batch_size, 1], 9)
    final_fillers = tf.fill([pattern_length, batch_size, 1], 8)
    inputs = tf.concat(axis=0, values=[pattern, central_fillers, go, final_fillers])

    fillers = tf.fill([sequence_length+pattern_length, batch_size, 1], 8)
    targets = tf.concat(axis=0, values=[fillers, pattern])

    return inputs, targets
コード例 #2
0
ファイル: bpm_estimator.py プロジェクト: nearlyeveryone/bpm
def thresholding(inputs):
    # find the mean for each example in the batch
    mean_output = tf.reduce_mean(inputs, axis=1)

    # scale each mean based on a factor
    threshold_scalar = tf.Variable(utils.threshold_scalar, tf.float32)
    scaled_mean = tf.scalar_mul(threshold_scalar, mean_output)
    scaled_mean = tf.reshape(scaled_mean, [utils.batch_size])

    # setup matrix for
    min_thresh_for_max = tf.fill([utils.batch_size], 0.05)
    max_thresh_for_min = tf.fill([utils.batch_size], 0.15)   #0.4
    thresholds = tf.maximum(min_thresh_for_max, scaled_mean)
    thresholds = tf.minimum(max_thresh_for_min, thresholds)

    # zero values under the thresholds using bitmask
    thresholds = tf.reshape(thresholds, [128, 1, 1])

    threshold_mask = tf.cast(tf.greater(inputs, thresholds), tf.float32)
    thresholded_input = tf.multiply(inputs, threshold_mask)

    # peak picking
    # select beats by x[i-1] < x[i] > x[i+1] (local maximum)
    x_minus_1 = tf.cast(tf.greater(thresholded_input, tf.manip.roll(thresholded_input, shift=-1, axis=1)), tf.float32)
    x_plus_1 = tf.cast(tf.greater(thresholded_input, tf.manip.roll(thresholded_input, shift=1, axis=1)), tf.float32)
    output = tf.multiply(x_minus_1, x_plus_1)


    return output
コード例 #3
0
ファイル: student_t.py プロジェクト: asudomoeva/probability
  def _variance(self):
    # We need to put the tf.where inside the outer tf.where to ensure we never
    # hit a NaN in the gradient.
    denom = tf.where(tf.greater(self.df, 2.),
                     self.df - 2.,
                     tf.ones_like(self.df))
    # Abs(scale) superfluous.
    var = (tf.ones(self.batch_shape_tensor(), dtype=self.dtype) *
           tf.square(self.scale) * self.df / denom)
    # When 1 < df <= 2, variance is infinite.
    inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype())
    result_where_defined = tf.where(
        self.df > tf.fill(self.batch_shape_tensor(), 2.),
        var,
        tf.fill(self.batch_shape_tensor(), inf, name="inf"))

    if self.allow_nan_stats:
      nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype())
      return tf.where(
          tf.greater(
              self.df,
              tf.ones(self.batch_shape_tensor(), dtype=self.dtype)),
          result_where_defined,
          tf.fill(self.batch_shape_tensor(), nan, name="nan"))
    else:
      return control_flow_ops.with_dependencies(
          [
              tf.assert_less(
                  tf.ones([], dtype=self.dtype),
                  self.df,
                  message="variance not defined for components of df <= 1"),
          ],
          result_where_defined)
コード例 #4
0
  def testParallelAssignWithLocking(self):
    with self.test_session() as sess:
      zeros_t = tf.fill([1024, 1024], 0.0)
      ones_t = tf.fill([1024, 1024], 1.0)
      p = tf.Variable(zeros_t)
      assigns = [tf.assign(p, tf.mul(ones_t, float(i)),
                                  use_locking=True)
                 for i in range(1, 21)]
      p.initializer.run()

      def run_assign(assign_op):
        sess.run(assign_op)
      threads = [self.checkedThread(target=run_assign, args=(assign_op,))
                 for assign_op in assigns]
      for t in threads:
        t.start()
      for t in threads:
        t.join()

      vals = p.eval()

      # Assert every element is the same, and taken from one of the assignments.
      self.assertTrue(vals[0, 0] > 0)
      self.assertTrue(vals[0, 0] <= 20)
      self.assertAllEqual(vals, np.ones([1024, 1024]) * vals[0, 0])
コード例 #5
0
ファイル: lstm.py プロジェクト: JoyceYa/edward
def language_model(input, vocab_size):
  """Form p(x[0], ..., x[timesteps - 1]),

  \prod_{t=0}^{timesteps - 1} p(x[t] | x[:t]),

  To calculate the probability, we call log_prob on
  x = [x[0], ..., x[timesteps - 1]] given
  `input` = [0, x[0], ..., x[timesteps - 2]].

  We implement this separately from the generative model so the
  forward pass, e.g., embedding/dense layers, can be parallelized.

  [batch_size, timesteps] -> [batch_size, timesteps]
  """
  x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32)
  h = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0)
  c = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0)
  hs = []
  reuse = None
  for t in range(FLAGS.timesteps):
    if t > 0:
      reuse = True
    xt = x[:, t, :]
    h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse)
    hs.append(h)

  h = tf.stack(hs, 1)
  logits = tf.layers.dense(h, vocab_size, name="dense")
  output = Categorical(logits=logits)
  return output
コード例 #6
0
ファイル: qaLSTMNet.py プロジェクト: sjqzhang/QA
 def getLoss(trueCosSim, falseCosSim, margin):
     zero = tf.fill(tf.shape(trueCosSim), 0.0)
     tfMargin = tf.fill(tf.shape(trueCosSim), margin)
     with tf.name_scope("loss"):
         losses = tf.maximum(zero, tf.subtract(tfMargin, tf.subtract(trueCosSim, falseCosSim)))
         loss = tf.reduce_sum(losses)
     return loss
コード例 #7
0
    def _create_state(self, batch_size, dtype, cell_state=None):
        cand_symbols = tf.fill([batch_size, self.max_len],
                               tf.constant(self.start_token, dtype=tf.int32))
        cand_logprobs = tf.ones((batch_size,), dtype=tf.float32) * -float('inf')
        cand_symbols.set_shape([batch_size, self.max_len])

        if cell_state is None:
            cell_state = self.cell.zero_state(batch_size*self.beam_size, dtype=dtype)
        else:
            cell_state = BeamDecoder._tile_along_beam(self.beam_size, cell_state)
        full_size = batch_size * self.beam_size
        first_in_beam_mask = tf.equal(tf.range(full_size) % self.beam_size, 0)

        beam_symbols = tf.fill([full_size, self.max_len],
                               tf.constant(self.start_token, dtype=tf.int32))
        beam_logprobs = tf.select(
            first_in_beam_mask,
            tf.fill([full_size], 0.0),
            tf.fill([full_size], -1e18), # top_k does not play well with -inf
                                         # TODO: dtype-dependent value here
        )

        return (
            cand_symbols,
            cand_logprobs,
            beam_symbols,
            beam_logprobs,
            cell_state
        )
コード例 #8
0
ファイル: model.py プロジェクト: Hukongtao/models
 def compute_ans(op_embedding, comparison):
   op_embedding = tf.expand_dims(op_embedding, 0)
   #dot product of operation embedding with hidden state to the left of the number occurrence
   first = tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question), 2),
                         [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   second = self.batch_question_number_one_mask + tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question_one), 2
                         ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
   if (self.mode == "test"):
     cond = tf.equal(question_number_softmax,
                     tf.reshape(
                         tf.reduce_max(question_number_softmax, 1),
                         [self.batch_size, 1]))
     question_number_softmax = tf.where(
         cond,
         tf.fill(tf.shape(question_number_softmax), 1.0),
         tf.fill(tf.shape(question_number_softmax), 0.0))
     question_number_softmax = tf.cast(question_number_softmax,
                                       self.data_type)
   ans = tf.reshape(
       tf.reduce_sum(question_number_softmax * tf.concat(
           axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
                     1), [self.batch_size, 1])
   return ans
コード例 #9
0
def _chain_backprop(n):
  """Creates forward backward graph using tf.gradients.

  A0->A1->A2->..->An
    /    /       /
  B0<-B1<-B2<-..<-Bn
  """

  def forward(A0, n):
    """Takes A0, applies n operations to it, returns An."""

    A = A0
    for L in range(1, n+1): # op_i produces A_i
      A = tf.tanh(A, name="A"+str(L))
    return A

  def backward(A0, An, Bn, n):
    B0 = tf.gradients([An], [A0], grad_ys=[Bn])[0]
    return B0

  A0 = tf.fill((size,), 1.0, name="A0")
  An = forward(A0, n)
  Bn = tf.fill((size,), 1.0, name="Bn")
  B0 = tf.gradients([An], [A0], grad_ys=[Bn])[0]
  return B0
コード例 #10
0
  def add_model(self, input_data):
    """Adds a linear-layer plus a softmax transformation

    The core transformation for this model which transforms a batch of input
    data into a batch of predictions. In this case, the mathematical
    transformation effected is

    y = softmax(xW + b)

    Hint: Make sure to create tf.Variables as needed. Also, make sure to use
          tf.name_scope to ensure that your name spaces are clean.
    Hint: For this simple use-case, it's sufficient to initialize both weights W
          and biases b with zeros.

    Args:
      input_data: A tensor of shape (batch_size, n_features).
    Returns:
      out: A tensor of shape (batch_size, n_classes)
    """
    ### YOUR CODE HERE
    with tf.variable_scope("linear-transform"):
        weight = tf.Variable(tf.fill([self.config.n_features,self.config.n_classes],0.0))
        bias = tf.Variable(tf.fill([self.config.n_classes],0.0))
        z = tf.matmul(input_data,weight) + bias
        out = softmax(z)
    ### END YOUR CODE
    return out
コード例 #11
0
 def testInitRequiredAssignAdd(self):
   with self.test_session():
     p = tf.Variable(tf.fill([1024, 1024], 1),
                            tf.int32)
     a = tf.assign_add(p, tf.fill([1024, 1024], 0))
     with self.assertRaisesOpError("use uninitialized"):
       a.op.run()
コード例 #12
0
ファイル: model.py プロジェクト: Hukongtao/models
 def make_hard_softmax(self, softmax):
   #converts soft selection to hard selection. used at test time
   cond = tf.equal(
       softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
   softmax = tf.where(
       cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
   softmax = tf.cast(softmax, self.data_type)
   return softmax
コード例 #13
0
ファイル: blocks_lstm.py プロジェクト: 812864539/models
def LSTMBiasInit(shape, dtype):
  """Returns ones for forget-gate, and zeros for the others."""
  shape = np.array(shape)

  # Check internal consistencies.
  assert shape.shape == (1,), shape
  assert shape[0] % 4 == 0, shape

  n = shape[0] // 4
  ones = tf.fill([n], tf.constant(1, dtype=dtype))
  zeros = tf.fill([3 * n], tf.constant(0, dtype=dtype))
  return tf.concat([ones, zeros], 0)
コード例 #14
0
ファイル: constant_op_test.py プロジェクト: 4chin/tensorflow
  def testFillNegative(self):
    with self.test_session():
      for shape in (-1,), (2, -1), (-1, 2):
        with self.assertRaises(ValueError):
          tf.fill(shape, 7)

      # Using a placeholder so this won't be caught in Python.
      dims = tf.placeholder(tf.int32)
      fill_t = tf.fill(dims, 3.0)
      for shape in (-1,), (2, -1), (-1, 2):
        with self.assertRaises(tf.errors.InvalidArgumentError):
          fill_t.eval({dims: shape})
コード例 #15
0
    def testShapeFunctionEdgeCases(self):
        # Non-vector dimensions.
        with self.assertRaises(ValueError):
            tf.fill([[0, 1], [2, 3]], 1.0)

        # Non-scalar value.
        with self.assertRaises(ValueError):
            tf.fill([3, 2], [1.0, 2.0])

        # Partial dimension information.
        f = tf.fill(tf.placeholder(tf.int32, shape=(4,)), 3.0)
        self.assertEqual([None, None, None, None], f.get_shape().as_list())
コード例 #16
0
  def testAssignNonStrictShapeChecking(self):
    with self.test_session():
      data = tf.fill([1024, 1024], 0)
      p = tf.Variable([1])
      a = tf.assign(p, data, validate_shape=False)
      a.op.run()
      self.assertAllEqual(p.eval(), data.eval())

      # Assign to yet another shape
      data2 = tf.fill([10, 10], 1)
      a2 = tf.assign(p, data2, validate_shape=False)
      a2.op.run()
      self.assertAllEqual(p.eval(), data2.eval())
コード例 #17
0
ファイル: rnn_core_test.py プロジェクト: ccchang0111/sonnet
  def testInitialStateComputation(self, tuple_state, mask):
    if tuple_state:
      initial_state = (tf.fill([BATCH_SIZE, 6], 2),
                       (tf.fill([BATCH_SIZE, 7], 3),
                        tf.fill([BATCH_SIZE, 8], 4)))
    else:
      initial_state = tf.fill([BATCH_SIZE, 9], 10)

    trainable_state_module = snt.TrainableInitialState(initial_state, mask=mask)
    trainable_state = trainable_state_module()
    flat_trainable_state = nest.flatten(trainable_state)
    nest.assert_same_structure(initial_state, trainable_state)
    flat_initial_state = nest.flatten(initial_state)
    if mask is not None:
      flat_mask = nest.flatten(mask)
    else:
      flat_mask = (True,) * len(flat_initial_state)

    self.evaluate(tf.global_variables_initializer())

    # Check all variables are initialized correctly and return a state that
    # has the same as it is provided.
    for trainable_state, initial_state in zip(flat_trainable_state,
                                              flat_initial_state):
      self.assertAllEqual(
          self.evaluate(trainable_state), self.evaluate(initial_state))

    # Change the value of all the trainable variables to ones.
    for variable in tf.trainable_variables():
      self.evaluate(tf.assign(variable, tf.ones_like(variable)))

    # In eager mode to re-evaluate the module we must re-connect it.
    trainable_state = trainable_state_module()
    flat_trainable_state = nest.flatten(trainable_state)

    # Check that the values of the initial_states have changed if and only if
    # they are trainable.
    for trainable_state, initial_state, mask in zip(flat_trainable_state,
                                                    flat_initial_state,
                                                    flat_mask):
      trainable_state_value = self.evaluate(trainable_state)
      initial_state_value = self.evaluate(initial_state)
      if mask:
        expected_value = np.ones_like(initial_state_value)
      else:
        expected_value = initial_state_value

      self.assertAllEqual(trainable_state_value, expected_value)
コード例 #18
0
def rnn_decoder(decoder_inputs, initial_state, cell, word_dropout_keep_prob=1, replace_inp=None,
                loop_function=None, scope=None):
  """RNN decoder for the sequence-to-sequence model.

  Args:
    decoder_inputs: A list of 2D Tensors [batch_size x input_size].
    initial_state: 2D Tensor with shape [batch_size x cell.state_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    loop_function: If not None, this function will be applied to the i-th output
      in order to generate the i+1-st input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/abs/1506.03099.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x input_size].
    scope: VariableScope for the created subgraph; defaults to "rnn_decoder".

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_size] containing generated outputs.
      state: The state of each cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
        (Note that in some cases, like basic RNN cell or GRU cell, outputs and
         states can be the same. They are different for LSTM cells though.)
  """
  with variable_scope.variable_scope(scope or "rnn_decoder"):
    state = initial_state
    outputs = []
    prev = None
    seq_len = len(decoder_inputs)
    keep = tf.select(tf.random_uniform([seq_len]) < word_dropout_keep_prob,
            tf.fill([seq_len], True), tf.fill([seq_len], False))
    for i, inp in enumerate(decoder_inputs):
      if loop_function is not None and prev is not None:
        with variable_scope.variable_scope("loop_function", reuse=True):
          if word_dropout_keep_prob < 1:
            inp = tf.cond(keep[i], lambda: loop_function(prev, i), lambda: replace_inp)
          else:
            inp = loop_function(prev, i)
      if i > 0:
        variable_scope.get_variable_scope().reuse_variables()
      output, state = cell(inp, state)
      outputs.append(output)
      if loop_function is not None:
        prev = output
  return outputs, state
コード例 #19
0
ファイル: util.py プロジェクト: asudomoeva/probability
 def grad(grad_ys):
   large_float_like_x = np.sqrt(np.finfo(x.dtype.as_numpy_dtype()).max)
   safe_grads = tf.where(
       tf.equal(x, 0),
       tf.fill(x.shape, large_float_like_x),
       0.5 * tf.rsqrt(x))
   return grad_ys * safe_grads
コード例 #20
0
ファイル: batch_reshape.py プロジェクト: lewisKit/probability
def calculate_reshape(original_shape, new_shape, validate=False, name=None):
  """Calculates the reshaped dimensions (replacing up to one -1 in reshape)."""
  batch_shape_static = tensor_util.constant_value_as_shape(new_shape)
  if batch_shape_static.is_fully_defined():
    return np.int32(batch_shape_static.as_list()), batch_shape_static, []
  with tf.name_scope(name, "calculate_reshape", [original_shape, new_shape]):
    original_size = tf.reduce_prod(original_shape)
    implicit_dim = tf.equal(new_shape, -1)
    size_implicit_dim = (
        original_size // tf.maximum(1, -tf.reduce_prod(new_shape)))
    new_ndims = tf.shape(new_shape)
    expanded_new_shape = tf.where(  # Assumes exactly one `-1`.
        implicit_dim, tf.fill(new_ndims, size_implicit_dim), new_shape)
    validations = [] if not validate else [
        tf.assert_rank(
            original_shape, 1, message="Original shape must be a vector."),
        tf.assert_rank(new_shape, 1, message="New shape must be a vector."),
        tf.assert_less_equal(
            tf.count_nonzero(implicit_dim, dtype=tf.int32),
            1,
            message="At most one dimension can be unknown."),
        tf.assert_positive(
            expanded_new_shape, message="Shape elements must be >=-1."),
        tf.assert_equal(
            tf.reduce_prod(expanded_new_shape),
            original_size,
            message="Shape sizes do not match."),
    ]
    return expanded_new_shape, batch_shape_static, validations
コード例 #21
0
ファイル: utils.py プロジェクト: Peratham/models
def BatchClipByL2norm(t, upper_bound, name=None):
  """Clip an array of tensors by L2 norm.

  Shrink each dimension-0 slice of tensor (for matrix it is each row) such
  that the l2 norm is at most upper_bound. Here we clip each row as it
  corresponds to each example in the batch.

  Args:
    t: the input tensor.
    upper_bound: the upperbound of the L2 norm.
    name: optional name.
  Returns:
    the clipped tensor.
  """

  assert upper_bound > 0
  with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
    saved_shape = tf.shape(t)
    batch_size = tf.slice(saved_shape, [0], [1])
    t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
    upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
                              tf.constant(1.0/upper_bound))
    # Add a small number to avoid divide by 0
    l2norm_inv = tf.rsqrt(tf.reduce_sum(t2 * t2, [1]) + 0.000001)
    scale = tf.minimum(l2norm_inv, upper_bound_inv) * upper_bound
    clipped_t = tf.matmul(tf.diag(scale), t2)
    clipped_t = tf.reshape(clipped_t, saved_shape, name=name)
  return clipped_t
コード例 #22
0
 def testLargeFetch(self):
   server = tf.train.Server.create_local_server()
   with tf.Session(server.target) as sess:
     c = tf.fill([10000, 3000], 0.5)
     expected_val = np.empty([10000, 3000], dtype=np.float32)
     expected_val.fill(0.5)
     self.assertAllEqual(expected_val, sess.run(c))
コード例 #23
0
 def get_variable(constraint):
   if constraint is None:
     i = next(index)
     return inputs[:, i:i+1]
   else:
     return tf.fill(constant_shape, tf.constant(constraint,
                                                dtype=inputs.dtype))
コード例 #24
0
ファイル: lstm1d.py プロジェクト: brchiu/tensorflow
def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False):
  """Run an LSTM, either forward or backward.

  This is a 1D LSTM implementation using dynamic_rnn and
  the TensorFlow LSTM op.

  Args:
    inputs: input sequence (length, batch_size, ninput)
    noutput: depth of output
    scope: optional scope name
    reverse: run LSTM in reverse

  Returns:
    Output sequence (length, batch_size, noutput)
  """
  with tf.variable_scope(scope, "SeqLstm", [inputs]):
    # TODO(tmb) make batch size, sequence_length dynamic
    # example: sequence_length = tf.shape(inputs)[0]
    _, batch_size, _ = _shape(inputs)
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False)
    state = tf.zeros([batch_size, lstm_cell.state_size])
    sequence_length = int(inputs.get_shape()[0])
    sequence_lengths = tf.to_int64(tf.fill([batch_size], sequence_length))
    if reverse:
      inputs = tf.reverse(inputs, [True, False, False])
    outputs, _ = tf.nn.dynamic_rnn(lstm_cell,
                                   inputs,
                                   sequence_lengths,
                                   state,
                                   time_major=True)
    if reverse:
      outputs = tf.reverse(outputs, [True, False, False])
    return outputs
コード例 #25
0
def init_memory(N, W, R):
    """
    returns the initial values of the memory matrix, usage vector,
    precedence vector, link matrix, read weightings, write weightings,
    and the read vectors
    """

    M0 = tf.fill([N, W], 1e-6)
    u0 = tf.zeros([N])
    p0 = tf.zeros([N])
    L0 = tf.zeros([N, N])
    wr0 = tf.fill([N, R], 1e-6)  # initial read weightings
    ww0 = tf.fill([N], 1e-6)  # initial write weightings
    r0 = tf.fill([W, R], 1e-6)  # initial read vector

    return M0, u0, p0, L0, wr0, ww0, r0
コード例 #26
0
 def testDtype(self):
   with self.test_session():
     d = tf.fill([2, 3], 12., name="fill")
     self.assertEqual(d.get_shape(), [2, 3])
     # Test default type for both constant size and dynamic size
     z = tf.ones([2, 3])
     self.assertEqual(z.dtype, tf.float32)
     self.assertEqual([2, 3], z.get_shape())
     self.assertAllEqual(z.eval(), np.ones([2, 3]))
     z = tf.ones(tf.shape(d))
     self.assertEqual(z.dtype, tf.float32)
     self.assertEqual([2, 3], z.get_shape())
     self.assertAllEqual(z.eval(), np.ones([2, 3]))
     # Test explicit type control
     for dtype in (tf.float32, tf.float64, tf.int32,
                   tf.uint8, tf.int16, tf.int8,
                   tf.complex64, tf.complex128, tf.int64, tf.bool):
       z = tf.ones([2, 3], dtype=dtype)
       self.assertEqual(z.dtype, dtype)
       self.assertEqual([2, 3], z.get_shape())
       self.assertAllEqual(z.eval(), np.ones([2, 3]))
       z = tf.ones(tf.shape(d), dtype=dtype)
       self.assertEqual(z.dtype, dtype)
       self.assertEqual([2, 3], z.get_shape())
       self.assertAllEqual(z.eval(), np.ones([2, 3]))
コード例 #27
0
ファイル: kernels.py プロジェクト: fujiisoup/GPflow
 def K(self, X, X2=None):
     if X2 is None:
         d = tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
         return tf.diag(d)
     else:
         shape = tf.pack([tf.shape(X)[0], tf.shape(X2)[0]])
         return tf.zeros(shape, tf.float64)
コード例 #28
0
ファイル: utilities.py プロジェクト: tensorprob/tensorprob
def set_logp_to_neg_inf(X, logp, bounds):
    """Set `logp` to negative infinity when `X` is outside the allowed bounds.

    # Arguments
        X: tensorflow.Tensor
            The variable to apply the bounds to
        logp: tensorflow.Tensor
            The log probability corrosponding to `X`
        bounds: list of `Region` objects
            The regions corrosponding to allowed regions of `X`

    # Returns
        logp: tensorflow.Tensor
            The newly bounded log probability
    """
    conditions = []
    for l, u in bounds:
        lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l)
        upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u)

        if not lower_is_neg_inf and upper_is_pos_inf:
            conditions.append(tf.greater(X, l))
        elif lower_is_neg_inf and not upper_is_pos_inf:
            conditions.append(tf.less(X, u))
        elif not (lower_is_neg_inf or upper_is_pos_inf):
            conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u)))

    if len(conditions) > 0:
        is_inside_bounds = conditions[0]
        for condition in conditions[1:]:
            is_inside_bounds = tf.logical_or(is_inside_bounds, condition)

        logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf)))

    return logp
コード例 #29
0
	def conv_relu(self, policy_input, target_input, kernel_shape, stride, layer_num):
		''' Build a convolutional layer

		Args:
			input_layer: input to convolutional layer - must be 4d
			target_input: input to layer of target network - must also be 4d
			kernel_shape: tuple for filter shape: (filter_height, filter_width, in_channels, out_channels)
			stride: tuple for stride: (1, vert_stride. horiz_stride, 1)
		'''
		name = 'conv' + str(layer_num + 1)
		with tf.variable_scope(name):

			# fan_in = tf.reduce_prod(tf.slice(policy_input.get_shape(), [1], [-1]))
			weights = tf.Variable(tf.truncated_normal(kernel_shape, stddev=0.01), name=(name + "_weights"))
			# weights = self.get_weights(kernel_shape, fan_in, name + "_weights")
			biases = tf.Variable(tf.fill([kernel_shape[-1]], 0.1), name=(name + "_biases"))
			# biases = self.get_biases([kernel_shape[-1]], fan_in, name + "_biases")

			activation = tf.nn.relu(tf.nn.conv2d(policy_input, weights, stride, 'VALID') + biases)

			target_weights = tf.Variable(weights.initialized_value(), trainable=False, name=("target_" + name + "_weights"))
			target_biases = tf.Variable(biases.initialized_value(), trainable=False, name=("target_" + name + "_biases"))

			target_activation = tf.nn.relu(tf.nn.conv2d(target_input, target_weights, stride, 'VALID') + target_biases)

			self.update_target.append(target_weights.assign(weights))
			self.update_target.append(target_biases.assign(biases))

			self.policy_network_params.append(weights)
			self.policy_network_params.append(biases)
			self.param_names.append(name + "_weights")
			self.param_names.append(name + "_biases")

			return [activation, target_activation]
コード例 #30
0
	def dense_linear(self, policy_input, target_input, shape):
		''' Build the fully-connected linear output layer 

		Args:
			input_layer: last hidden layer
			target_input: last hidden layer of target network
			shape: tuple for weight shape (num_input_nodes, num_actions)
		'''
		name = 'q_layer'
		with tf.variable_scope(name):

			# fan_in = tf.reduce_prod(tf.slice(policy_input.get_shape(), [1], [-1]))
			weights = tf.Variable(tf.truncated_normal(shape, stddev=0.01), name=(name + "_weights"))
			# weights = self.get_weights(shape, fan_in, name + "_weights")
			biases = tf.Variable(tf.fill([shape[-1]], 0.1), name=(name + "_biases"))
			# biases = self.get_biases([shape[-1]], fan_in, name + "_biases")


			activation = tf.matmul(policy_input, weights) + biases

			target_weights = tf.Variable(weights.initialized_value(), trainable=False, name=("target_" + name + "_weights"))
			target_biases = tf.Variable(biases.initialized_value(), trainable=False, name=("target_" + name + "_biases"))

			target_activation = tf.matmul(target_input, target_weights) + target_biases

			self.update_target.append(target_weights.assign(weights))
			self.update_target.append(target_biases.assign(biases))

			self.policy_network_params.append(weights)
			self.policy_network_params.append(biases)
			self.param_names.append(name + "_weights")
			self.param_names.append(name + "_biases")

			return [activation, target_activation]
コード例 #31
0
def quadrature_scheme_lognormal_quantiles(loc,
                                          scale,
                                          quadrature_size,
                                          validate_args=False,
                                          name=None):
    """Use LogNormal quantiles to form quadrature on positive-reals.

  Args:
    loc: `float`-like (batch of) scalar `Tensor`; the location parameter of
      the LogNormal prior.
    scale: `float`-like (batch of) scalar `Tensor`; the scale parameter of
      the LogNormal prior.
    quadrature_size: Python `int` scalar representing the number of quadrature
      points.
    validate_args: Python `bool`, default `False`. When `True` distribution
      parameters are checked for validity despite possibly degrading runtime
      performance. When `False` invalid inputs may silently render incorrect
      outputs.
    name: Python `str` name prefixed to Ops created by this class.

  Returns:
    grid: (Batch of) length-`quadrature_size` vectors representing the
      `log_rate` parameters of a `Poisson`.
    probs: (Batch of) length-`quadrature_size` vectors representing the
      weight associate with each `grid` value.
  """
    with tf.name_scope(name, "quadrature_scheme_lognormal_quantiles",
                       [loc, scale]):
        # Create a LogNormal distribution.
        dist = transformed_lib.TransformedDistribution(
            distribution=tf.distributions.Normal(loc=loc, scale=scale),
            bijector=Exp(),
            validate_args=validate_args)
        batch_ndims = dist.batch_shape.ndims
        if batch_ndims is None:
            batch_ndims = tf.shape(dist.batch_shape_tensor())[0]

        def _compute_quantiles():
            """Helper to build quantiles."""
            # Omit {0, 1} since they might lead to Inf/NaN.
            zero = tf.zeros([], dtype=dist.dtype)
            edges = tf.linspace(zero, 1., quadrature_size + 3)[1:-1]
            # Expand edges so its broadcast across batch dims.
            edges = tf.reshape(
                edges,
                shape=tf.concat(
                    [[-1], tf.ones([batch_ndims], dtype=tf.int32)], axis=0))
            quantiles = dist.quantile(edges)
            # Cyclically permute left by one.
            perm = tf.concat([tf.range(1, 1 + batch_ndims), [0]], axis=0)
            quantiles = tf.transpose(quantiles, perm)
            return quantiles

        quantiles = _compute_quantiles()

        # Compute grid as quantile midpoints.
        grid = (quantiles[..., :-1] + quantiles[..., 1:]) / 2.
        # Set shape hints.
        grid.set_shape(dist.batch_shape.concatenate([quadrature_size]))

        # By construction probs is constant, i.e., `1 / quadrature_size`. This is
        # important, because non-constant probs leads to non-reparameterizable
        # samples.
        probs = tf.fill(dims=[quadrature_size],
                        value=1. / tf.cast(quadrature_size, dist.dtype))

        return grid, probs
コード例 #32
0
ファイル: model.py プロジェクト: HanzoZY/few-shot-gnn
    def __init__(self, hparams, input_tensor, label_tensor, is_train):
        self.num_classes = hparams.n
        self.batch_size = hparams.batch_size
        self.seq_len = hparams.seq_len
        self.input_dim = hparams.input_dim
        self.num_gcn_blocks = hparams.num_gcn_blocks
        self.lr = hparams.lr
        self.hop = hparams.hop
        self.label_cut = hparams.label_cut

        # self.input_placeholder = tf.nn.l2_normalize(tf.cast(input_tensor, tf.float32),axis=-1)
        self.input_placeholder = tf.cast(input_tensor, tf.float32)
        self.label_placeholder = label_tensor
        self.is_train = is_train
        if self.is_train:
            self.global_step = tf.get_variable("global_step",
                                               initializer=0,
                                               trainable=False)
        else:
            self.global_step = None
        feed_label, target_label = tf.split(self.label_placeholder,
                                            [self.seq_len - 1, 1],
                                            axis=1)
        self.target_label = tf.reshape(target_label, shape=[-1])
        # self.target_label = target_label
        # self.target_label=tf.one_hot(self.target_label,depth=self.num_classes,dtype=tf.float32)
        feed_label_one_hot_without_target = tf.one_hot(feed_label,
                                                       depth=self.num_classes,
                                                       dtype=tf.float32)
        self.feed_label_one_hot_with_target = tf.concat([
            feed_label_one_hot_without_target,
            tf.fill([self.batch_size, 1, self.num_classes],
                    1.0 / self.num_classes)
        ],
                                                        axis=1)
        self.concated_input = tf.concat(
            [self.input_placeholder, self.feed_label_one_hot_with_target],
            axis=2)

        data_store = self.input_placeholder
        label_store = self.feed_label_one_hot_with_target
        '''for test only'''
        # name = 'GCN_Blocks'
        # with tf.variable_scope(name):
        #     data_store, _, self.diff,label_store, propagation_store ,self.Lap,self.simi,self.cmpr= self._gcn_block(input_data=data_store,input_label=label_store,add_dim=self.num_classes, drop=False)

        for i in range(self.num_gcn_blocks):
            #是否公用相似度函数和感受野比例
            # name='GCN_Blocks'
            name = f"GCN_Block_{i}"
            with tf.variable_scope(name):
                _, data_store, label_store, _ = self._gcn_block(
                    input_data=data_store,
                    input_label=label_store,
                    add_dim=int(self.input_dim / 2))

        with tf.variable_scope('last_Block'):

            data_store, _, label_store, propagation_store = self._gcn_block(
                input_data=data_store,
                input_label=label_store,
                add_dim=self.num_classes)
        self.label_store = label_store
        if self.label_cut == 'yes':
            print('use cut')
            self.predict_label = label_store[:, -1, :]
        elif self.label_cut == 'no':
            self.predict_label = data_store[:, -1, :]
        else:
            self.predict_label = self._add_nn_block(
                x=tf.concat([data_store[:, -1, :], label_store[:, -1, :]],
                            axis=-1),
                out_channel=self.num_classes)
        self.propagation = propagation_store
        ce_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.target_label, logits=self.predict_label))
        self.loss = ce_loss
        self.train_step = tf.train.AdamOptimizer(self.lr).minimize(
            self.loss, global_step=self.global_step)
        self.accuracy = self._calc_accuracy()
コード例 #33
0
import numpy as np

# Square matrix A of rank 2
A = tf.constant([[1., 2.], [3., 4.]])

# 2x2 Square, Diagonal, Symmetric matrix B
B = tf.diag([5., 6.])

# 2x2 Square matrix
C = tf.constant([[1., 2.], [2., 4.]])

# 2x1 vector will all elements equal to 1
x = tf.ones([2, 1])

# 2x1 vector will all elements equal to 2.0
b = tf.fill([2, 1], 2.)

# 2x1 vector
y = tf.constant([[-1.], [1.]])

# run within a session and print
with tf.Session() as session:
    print("Tensorflow version: " + tf.__version__)
    tf.global_variables_initializer().run()

    print("A = ")
    print(A.eval())

    print("B = ")
    print(B.eval())
コード例 #34
0
 def testInitRequiredAssignAdd(self):
     with self.test_session():
         p = tf.Variable(tf.fill([1024, 1024], 1), tf.int32)
         a = tf.assign_add(p, tf.fill([1024, 1024], 0))
         with self.assertRaisesOpError("use uninitialized"):
             a.op.run()
コード例 #35
0
    def beam_loop(self, time, cell_output, cell_state, loop_state):
        (
            past_cand_symbols,  # [batch_size, time-1]
            past_cand_logprobs,  # [batch_size]
            past_beam_symbols,  # [batch_size*beam_size, time-1], right-aligned
            past_beam_logprobs,  # [batch_size*beam_size]
        ) = loop_state

        # We don't actually use this, but emit_output is required to match the
        # cell output size specfication. Otherwise we would leave this as None.
        emit_output = cell_output

        # 1. Get scores for all candidate sequences

        logprobs = self.outputs_to_score_fn(cell_output)

        try:
            num_classes = int(logprobs.get_shape()[-1])
        except:
            # Shape inference failed
            num_classes = tf.shape(logprobs)[-1]

        logprobs_batched = tf.reshape(
            logprobs + tf.expand_dims(
                tf.reshape(past_beam_logprobs,
                           [self.batch_size, self.beam_size]), 2),
            [self.batch_size, self.beam_size * num_classes])

        # 2. Determine which states to pass to next iteration

        # TODO(nikita): consider using slice+fill+concat instead of adding a mask
        nondone_mask = tf.reshape(
            tf.cast(tf.equal(tf.range(num_classes), self.stop_token),
                    tf.float32) * self.INVALID_SCORE, [1, 1, num_classes])

        nondone_mask = tf.reshape(
            tf.tile(nondone_mask, [1, self.beam_size, 1]),
            [-1, self.beam_size * num_classes])

        beam_logprobs, indices = tf.nn.top_k(logprobs_batched + nondone_mask,
                                             self.beam_size)
        beam_logprobs = tf.reshape(beam_logprobs, [-1])

        # For continuing to the next symbols
        symbols = indices % num_classes  # [batch_size, self.beam_size]
        parent_refs = indices // num_classes  # [batch_size, self.beam_size]

        symbols_history = flat_batch_gather(past_beam_symbols,
                                            parent_refs,
                                            batch_size=self.batch_size,
                                            options_size=self.beam_size)
        beam_symbols = tf.concat(
            1, [symbols_history, tf.reshape(symbols, [-1, 1])])

        # Handle the output and the cell state shuffling
        next_cell_state = nest_map(
            lambda element: batch_gather(element,
                                         parent_refs,
                                         batch_size=self.batch_size,
                                         options_size=self.beam_size),
            cell_state)

        next_input = self.tokens_to_inputs_fn(
            tf.reshape(symbols, [-1, self.beam_size]))

        # 3. Update the candidate pool to include entries that just ended with a stop token
        logprobs_done = tf.reshape(
            logprobs_batched,
            [-1, self.beam_size, num_classes])[:, :, self.stop_token]
        done_parent_refs = tf.argmax(logprobs_done, 1)
        done_symbols = flat_batch_gather(past_beam_symbols,
                                         done_parent_refs,
                                         batch_size=self.batch_size,
                                         options_size=self.beam_size)

        logprobs_done_max = tf.reduce_max(logprobs_done, 1)

        cand_symbols_unpadded = tf.select(
            logprobs_done_max > past_cand_logprobs, done_symbols,
            past_cand_symbols)
        cand_logprobs = tf.maximum(logprobs_done_max, past_cand_logprobs)

        cand_symbols = tf.concat(1, [
            cand_symbols_unpadded,
            tf.fill([self.batch_size, 1], self.stop_token)
        ])

        # 4. Check the stopping criteria

        if self.max_len is not None:
            elements_finished_clip = (time >= self.max_len)

        if self.score_upper_bound is not None:
            elements_finished_bound = tf.reduce_max(
                tf.reshape(beam_logprobs, [-1, self.beam_size]),
                1) < (cand_logprobs - self.score_upper_bound)

        if self.max_len is not None and self.score_upper_bound is not None:
            elements_finished = elements_finished_clip | elements_finished_bound
        elif self.score_upper_bound is not None:
            elements_finished = elements_finished_bound
        elif self.max_len is not None:
            # this broadcasts elements_finished_clip to the correct shape
            elements_finished = tf.zeros(
                [self.batch_size], dtype=tf.bool) | elements_finished_clip
        else:
            assert False, "Lack of stopping criterion should have been caught in constructor"

        # 5. Prepare return values
        # While loops require strict shape invariants, so we manually set shapes
        # in case the automatic shape inference can't calculate these. Even when
        # this is redundant is has the benefit of helping catch shape bugs.

        for tensor in list(nest.flatten(next_input)) + list(
                nest.flatten(next_cell_state)):
            tensor.set_shape(
                tf.TensorShape(
                    (self.inferred_batch_size,
                     self.beam_size)).concatenate(tensor.get_shape()[2:]))

        for tensor in [cand_symbols, cand_logprobs, elements_finished]:
            tensor.set_shape(
                tf.TensorShape((self.inferred_batch_size, )).concatenate(
                    tensor.get_shape()[1:]))

        for tensor in [beam_symbols, beam_logprobs]:
            tensor.set_shape(
                tf.TensorShape(
                    (self.inferred_batch_size_times_beam_size, )).concatenate(
                        tensor.get_shape()[1:]))

        next_loop_state = (
            cand_symbols,
            cand_logprobs,
            beam_symbols,
            beam_logprobs,
        )

        return (elements_finished, next_input, next_cell_state, emit_output,
                next_loop_state)
コード例 #36
0
    def _build_model(self):
        # CNN
        with tf.variable_scope('cnn'):
            x = self.inputs
            filters = [1, 64, 128, 128, self._out_channels]
            for i in range(self._cnn_count):
                with tf.variable_scope('unit-%d' % (i + 1)):
                    x = self._conv2d(x,
                                     'cnn-%d' % (i + 1),
                                     3,
                                     filters[i],
                                     filters[i + 1],
                                     strides=1)
                    x = self._batch_norm(is_train=self._is_train,
                                         name='bn%d' % (i + 1),
                                         x=x)
                    x = self._leaky_relu(x)
                    x = self._max_pool(x, 2, strides=2)
                pass
            _, feature_h, feature_w, _ = x.get_shape().as_list()
            print('\nfeature_h: {}, feature_w: {}'.format(
                feature_h, feature_w))
            pass

        # 一维数据,长度为batch_size,值为feature_w
        # 表示每个数据的time_step长度
        self.seq_len = tf.fill([self._batch_size], feature_w)

        # LSTM
        with tf.variable_scope('lstm'):
            x = tf.transpose(x, [0, 2, 1, 3])
            x = tf.reshape(
                x,
                [self._batch_size, feature_w, feature_h * self._out_channels])
            print('lstm input shape: {}'.format(x.get_shape().as_list()))

            cell = tf.nn.rnn_cell.LSTMCell(self._num_hidden,
                                           state_is_tuple=True)
            cell1 = tf.nn.rnn_cell.LSTMCell(self._num_hidden,
                                            state_is_tuple=True)
            if self._is_train:
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    cell=cell, output_keep_prob=self._output_keep_prob)
                cell1 = tf.nn.rnn_cell.DropoutWrapper(
                    cell=cell1, output_keep_prob=self._output_keep_prob)

            # Stacking rnn cells
            stack = tf.nn.rnn_cell.MultiRNNCell([cell, cell1],
                                                state_is_tuple=True)
            initial_state = stack.zero_state(self._batch_size,
                                             dtype=tf.float32)

            # The second output is the last state and we will not use that
            outputs, _ = tf.nn.dynamic_rnn(cell=stack,
                                           inputs=x,
                                           sequence_length=self.seq_len,
                                           initial_state=initial_state,
                                           dtype=tf.float32,
                                           time_major=False)
            pass

        outputs = tf.reshape(outputs, [-1, self._num_hidden])
        w = tf.get_variable('W_out', [self._num_hidden, self._num_classes],
                            tf.float32, tf.glorot_uniform_initializer())
        b = tf.get_variable('b_out',
                            shape=[self._num_classes],
                            dtype=tf.float32,
                            initializer=tf.constant_initializer())

        self.logits = tf.add(tf.matmul(outputs, w), b)
        self.logits = tf.reshape(self.logits,
                                 [tf.shape(x)[0], -1, self._num_classes])
        # Time major
        self.logits = tf.transpose(self.logits, (1, 0, 2))
        pass
コード例 #37
0
    def test_fill(self):
        # computation
        f = tf.fill([2, 3], 5)

        # test
        self.run(f)
コード例 #38
0
ファイル: events_rnn_graph.py プロジェクト: yynst2/magenta
    def build():
        """Builds the Tensorflow graph."""
        inputs, labels, lengths = None, None, None

        if mode in ('train', 'eval'):
            if isinstance(no_event_label, numbers.Number):
                label_shape = []
            else:
                label_shape = [len(no_event_label)]
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                label_shape=label_shape,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        if isinstance(encoder_decoder,
                      magenta.music.OneHotIndexEventSequenceEncoderDecoder):
            expanded_inputs = tf.one_hot(
                tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
                encoder_decoder.input_depth)
        else:
            expanded_inputs = inputs

        dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

        if hparams.use_cudnn:
            outputs, initial_state, final_state = make_cudnn(
                expanded_inputs,
                hparams.rnn_layer_sizes,
                hparams.batch_size,
                mode,
                dropout_keep_prob=dropout_keep_prob,
                residual_connections=hparams.residual_connections)

        else:
            cell = make_rnn_cell(
                hparams.rnn_layer_sizes,
                dropout_keep_prob=dropout_keep_prob,
                attn_length=hparams.attn_length,
                residual_connections=hparams.residual_connections)

            initial_state = cell.zero_state(hparams.batch_size, tf.float32)

            outputs, final_state = tf.nn.dynamic_rnn(
                cell,
                inputs,
                sequence_length=lengths,
                initial_state=initial_state,
                swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        if isinstance(num_classes, numbers.Number):
            num_logits = num_classes
        else:
            num_logits = sum(num_classes)
        logits_flat = contrib_layers.linear(outputs_flat, num_logits)

        if mode in ('train', 'eval'):
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            if isinstance(num_classes, numbers.Number):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
                predictions_flat = tf.argmax(logits_flat, axis=1)
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax_cross_entropy = []
                predictions = []
                for i in range(len(num_classes)):
                    softmax_cross_entropy.append(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            labels=labels_flat[:, i],
                            logits=logits_flat[:, logits_offsets[i]:
                                               logits_offsets[i + 1]]))
                    predictions.append(
                        tf.argmax(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            axis=1))
                predictions_flat = tf.stack(predictions, 1)

            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            # Compute the total number of time steps across all sequences in the
            # batch. For some models this will be different from the number of RNN
            # steps.
            def batch_labels_to_num_steps(batch_labels, lengths):
                num_steps = 0
                for labels, length in zip(batch_labels, lengths):
                    num_steps += encoder_decoder.labels_to_num_steps(
                        labels[:length])
                return np.float32(num_steps)

            num_steps = tf.py_func(batch_labels_to_num_steps,
                                   [labels, lengths], tf.float32)

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                loss_per_step = tf.reduce_sum(
                    softmax_cross_entropy) / num_steps
                perplexity_per_step = tf.exp(loss_per_step)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = contrib_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                    'metrics/loss_per_step': loss_per_step,
                    'metrics/perplexity_per_step': perplexity_per_step,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                        'metrics/loss_per_step':
                        tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) /
                                        num_steps,
                                        weights=num_steps),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])
                vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
                    vars_to_summarize['metrics/loss_per_step'])

            for var_name, var_value in six.iteritems(vars_to_summarize):
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            if isinstance(num_classes, numbers.Number):
                softmax_flat = tf.nn.softmax(
                    tf.div(logits_flat, tf.fill([num_classes], temperature)))
                softmax = tf.reshape(softmax_flat,
                                     [hparams.batch_size, -1, num_classes])
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax = []
                for i in range(len(num_classes)):
                    sm = tf.nn.softmax(
                        tf.div(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            tf.fill([num_classes[i]], temperature)))
                    sm = tf.reshape(sm,
                                    [hparams.batch_size, -1, num_classes[i]])
                    softmax.append(sm)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf_nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf_nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
コード例 #39
0
ファイル: test_utils.py プロジェクト: lbstroud/agents
 def _get_initial_state(self, batch_size):
     return tf.fill([batch_size], 0)
コード例 #40
0
ファイル: model.py プロジェクト: Andy23-yang/Code2seq
    def decode_outputs(self,
                       target_words_vocab,
                       target_input,
                       batch_size,
                       batched_contexts,
                       valid_mask,
                       is_evaluating=False):
        num_contexts_per_example = tf.count_nonzero(valid_mask, axis=-1)

        start_fill = tf.fill([batch_size],
                             self.target_to_index[Common.SOS])  # (batch, )
        decoder_cell = tf.nn.rnn_cell.MultiRNNCell([
            tf.nn.rnn_cell.LSTMCell(self.config.DECODER_SIZE)
            for _ in range(self.config.NUM_DECODER_LAYERS)
        ])
        contexts_sum = tf.reduce_sum(
            batched_contexts * tf.expand_dims(valid_mask, -1),
            axis=1)  # (batch_size, dim * 2 + rnn_size)
        contexts_average = tf.divide(
            contexts_sum,
            tf.to_float(tf.expand_dims(num_contexts_per_example, -1)))
        fake_encoder_state = tuple(
            tf.nn.rnn_cell.LSTMStateTuple(contexts_average, contexts_average)
            for _ in range(self.config.NUM_DECODER_LAYERS))
        projection_layer = tf.layers.Dense(self.target_vocab_size,
                                           use_bias=False)
        if is_evaluating and self.config.BEAM_WIDTH > 0:
            batched_contexts = tf.contrib.seq2seq.tile_batch(
                batched_contexts, multiplier=self.config.BEAM_WIDTH)
            num_contexts_per_example = tf.contrib.seq2seq.tile_batch(
                num_contexts_per_example, multiplier=self.config.BEAM_WIDTH)
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
            num_units=self.config.DECODER_SIZE, memory=batched_contexts)
        # TF doesn't support beam search with alignment history
        should_save_alignment_history = is_evaluating and self.config.BEAM_WIDTH == 0
        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cell,
            attention_mechanism,
            attention_layer_size=self.config.DECODER_SIZE,
            alignment_history=should_save_alignment_history)
        if is_evaluating:
            if self.config.BEAM_WIDTH > 0:
                decoder_initial_state = decoder_cell.zero_state(
                    dtype=tf.float32,
                    batch_size=batch_size * self.config.BEAM_WIDTH)
                decoder_initial_state = decoder_initial_state.clone(
                    cell_state=tf.contrib.seq2seq.tile_batch(
                        fake_encoder_state, multiplier=self.config.BEAM_WIDTH))
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=target_words_vocab,
                    start_tokens=start_fill,
                    end_token=self.target_to_index[Common.PAD],
                    initial_state=decoder_initial_state,
                    beam_width=self.config.BEAM_WIDTH,
                    output_layer=projection_layer,
                    length_penalty_weight=0.0)
            else:
                helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    target_words_vocab, start_fill, 0)
                initial_state = decoder_cell.zero_state(
                    batch_size,
                    tf.float32).clone(cell_state=fake_encoder_state)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=helper,
                    initial_state=initial_state,
                    output_layer=projection_layer)

        else:
            decoder_cell = tf.nn.rnn_cell.DropoutWrapper(
                decoder_cell,
                output_keep_prob=self.config.RNN_DROPOUT_KEEP_PROB)
            target_words_embedding = tf.nn.embedding_lookup(
                target_words_vocab,
                tf.concat(
                    [tf.expand_dims(start_fill, -1), target_input],
                    axis=-1))  # (batch, max_target_parts, dim * 2 + rnn_size)
            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=target_words_embedding,
                sequence_length=tf.ones([batch_size], dtype=tf.int32) *
                (self.config.MAX_TARGET_PARTS + 1))

            initial_state = decoder_cell.zero_state(
                batch_size, tf.float32).clone(cell_state=fake_encoder_state)

            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=decoder_cell,
                helper=helper,
                initial_state=initial_state,
                output_layer=projection_layer)
        outputs, final_states, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
            decoder, maximum_iterations=self.config.MAX_TARGET_PARTS + 1)
        return outputs, final_states
コード例 #41
0
    def decoder_with_caching(self, encoded, len_encoded):
        """
        gread search, used for self-learning training or infer
        """
        batch_size = tf.shape(encoded)[0]
        token_init = tf.fill([batch_size, 1], self.start_token)
        logits_init = tf.zeros([batch_size, 1, self.dim_output],
                               dtype=tf.float32)
        finished_init = tf.zeros([batch_size], dtype=tf.bool)
        len_decoded_init = tf.ones([batch_size], dtype=tf.int32)
        cache_decoder_init = tf.zeros(
            [batch_size, 0, self.num_blocks, self.num_cell_units])
        encoder_padding = tf.equal(
            tf.sequence_mask(len_encoded, maxlen=tf.shape(encoded)[1]),
            False)  # bool tensor
        encoder_attention_bias = common_attention.attention_bias_ignore_padding(
            encoder_padding)

        def step(i, preds, cache_decoder, logits, len_decoded, finished):

            preds_emb = self.embedding(preds)
            decoder_input = preds_emb

            decoder_output, cache_decoder = self.decoder_with_caching_impl(
                decoder_input, cache_decoder, encoded, encoder_attention_bias)

            cur_logit = tf.layers.dense(inputs=decoder_output[:, -1, :],
                                        units=self.dim_output,
                                        activation=None,
                                        use_bias=False,
                                        name='decoder_fc')

            cur_ids = tf.to_int32(tf.argmax(cur_logit, -1))
            preds = tf.concat([preds, cur_ids[:, None]], axis=1)
            logits = tf.concat([logits, cur_logit[:, None]], 1)

            # Whether sequences finished.
            has_eos = tf.equal(cur_ids, self.end_token)
            finished = tf.logical_or(finished, has_eos)
            len_decoded += 1 - tf.to_int32(finished)

            return i + 1, preds, cache_decoder, logits, len_decoded, finished

        def not_finished(i, preds, cache, logit, len_decoded, finished):
            return tf.logical_and(
                tf.reduce_any(tf.logical_not(finished)),
                tf.less(
                    i,
                    tf.reduce_min([tf.shape(encoded)[1],
                                   self.args.max_len])  # maxlen = 25
                ))

        i, preds, cache_decoder, logits, len_decoded, finished = tf.while_loop(
            cond=not_finished,
            body=step,
            loop_vars=[
                0, token_init, cache_decoder_init, logits_init,
                len_decoded_init, finished_init
            ],
            shape_invariants=[
                tf.TensorShape([]),
                tf.TensorShape([None, None]),
                tf.TensorShape([None, None, None, None]),
                tf.TensorShape([None, None, self.dim_output]),
                tf.TensorShape([None]),
                tf.TensorShape([None])
            ])
        # len_decoded = tf.Print(len_decoded, [finished], message='finished: ', summarize=1000)
        len_decoded -= 1 - tf.to_int32(
            finished)  # for decoded length cut by encoded length
        logits = logits[:, 1:, :]
        preds = preds[:, 1:]
        not_padding = tf.sequence_mask(len_decoded, dtype=tf.int32)
        preds = tf.multiply(tf.to_int32(preds), not_padding)

        return logits, preds, len_decoded
コード例 #42
0
    def beam_decode_rerank(self, encoded, len_encoded):
        """
        beam search rerank at end with language model integration (self-attention model)
        the input to te score is <sos> + tokens !!!
        """
        beam_size = self.beam_size
        batch_size = tf.shape(len_encoded)[0]

        # beam search Initialize
        # repeat each sample in batch along the batch axis [1,2,3,4] -> [1,1,2,2,3,3,4,4]
        encoded = tf.tile(encoded[:, None, :, :],
                          multiples=[
                              1, beam_size, 1, 1
                          ])  # [batch_size, beam_size, *, hidden_units]
        encoded = tf.reshape(
            encoded,
            [batch_size * beam_size, -1,
             encoded.get_shape()[-1].value])
        len_encoded = tf.reshape(
            tf.tile(len_encoded[:, None], multiples=[1, beam_size]),
            [-1])  # [batch_size * beam_size]

        # [[<S>, <S>, ..., <S>]], shape: [batch_size * beam_size, 1]
        token_init = tf.fill([batch_size * beam_size, 1], self.args.sos_idx)
        logits_init = tf.zeros([batch_size * beam_size, 0, self.dim_output],
                               dtype=tf.float32)
        len_decoded_init = tf.ones_like(len_encoded, dtype=tf.int32)
        # the score must be [0, -inf, -inf, ...] at init, for the preds in beam is same in init!!!
        scores_init = tf.constant([0.0] + [-inf] * (beam_size - 1),
                                  dtype=tf.float32)  # [beam_size]
        scores_init = tf.tile(scores_init,
                              multiples=[batch_size
                                         ])  # [batch_size * beam_size]
        finished_init = tf.zeros_like(scores_init, dtype=tf.bool)

        cache_decoder_init = tf.zeros(
            [batch_size * beam_size, 0, self.num_blocks, self.num_cell_units])
        if self.lm:
            cache_lm_init = tf.zeros([
                batch_size * beam_size, 0,
                self.lm.args.model.decoder.num_blocks,
                self.lm.args.model.decoder.num_cell_units
            ])
        else:
            cache_lm_init = tf.zeros([0, 0, 0, 0])

        # collect the initial states of lstms used in decoder.
        base_indices = tf.reshape(tf.tile(tf.range(batch_size)[:, None],
                                          multiples=[1, beam_size]),
                                  shape=[-1])

        encoder_padding = tf.equal(
            tf.sequence_mask(len_encoded, maxlen=tf.shape(encoded)[1]),
            False)  # bool tensor
        encoder_attention_bias = common_attention.attention_bias_ignore_padding(
            encoder_padding)

        def step(i, preds, scores, cache_decoder, cache_lm, logits,
                 len_decoded, finished):
            """
            the cache has no specific shape, so no can be put in the all_states
            """
            preds_emb = self.embedding(preds)
            decoder_input = preds_emb

            decoder_output, cache_decoder = self.decoder_with_caching_impl(
                decoder_input, cache_decoder, encoded, encoder_attention_bias)

            cur_logit = tf.layers.dense(inputs=decoder_output[:, -1, :],
                                        units=self.dim_output,
                                        activation=None,
                                        use_bias=False,
                                        name='decoder_fc')

            logits = tf.concat([logits, cur_logit[:, None]], 1)
            z = tf.nn.log_softmax(cur_logit)  # [batch*beam, size_output]

            # the langueage model infer
            if self.args.model.shallow_fusion:
                assert self.lm
                preds_emb = self.lm.decoder.embedding(preds)

                with tf.variable_scope(self.args.top_scope, reuse=True):
                    with tf.variable_scope(self.args.lm_scope):
                        lm_output, cache_lm = self.lm.decoder.decoder_with_caching_impl(
                            preds_emb, cache_lm)
                        logit_lm = dense(inputs=lm_output[:, -1, :],
                                         units=self.dim_output,
                                         kernel=tf.transpose(
                                             self.lm.decoder.fully_connected),
                                         use_bias=False)
                z_lm = self.lambda_lm * tf.nn.log_softmax(
                    logit_lm)  # [batch*beam, size_output]
            else:
                z_lm = tf.zeros_like(z)

            # rank the combined scores
            next_scores, next_preds = tf.nn.top_k(z + z_lm,
                                                  k=beam_size,
                                                  sorted=True)
            next_preds = tf.to_int32(next_preds)

            # beamed scores & Pruning
            scores = scores[:,
                            None] + next_scores  # [batch_size * beam_size, beam_size]
            scores = tf.reshape(scores,
                                shape=[batch_size, beam_size * beam_size])

            _, k_indices = tf.nn.top_k(scores, k=beam_size)
            k_indices = base_indices * beam_size * beam_size + tf.reshape(
                k_indices, shape=[-1])  # [batch_size * beam_size]
            # Update scores.
            scores = tf.reshape(scores, [-1])
            scores = tf.gather(scores, k_indices)
            # Update predictions.
            next_preds = tf.reshape(next_preds, shape=[-1])
            next_preds = tf.gather(next_preds, indices=k_indices)

            # k_indices: [0~batch*beam*beam], preds: [0~batch*beam]
            # preds, cache_lm, cache_decoder: these data are shared during the beam expand among vocab
            preds = tf.gather(preds, indices=k_indices // beam_size)
            cache_lm = tf.gather(cache_lm, indices=k_indices // beam_size)
            cache_decoder = tf.gather(cache_decoder,
                                      indices=k_indices // beam_size)
            preds = tf.concat([preds, next_preds[:, None]],
                              axis=1)  # [batch_size * beam_size, i]

            has_eos = tf.equal(next_preds, self.end_token)
            finished = tf.logical_or(finished, has_eos)
            len_decoded += 1 - tf.to_int32(finished)
            # i = tf.Print(i, [i], message='i: ', summarize=1000)

            return i + 1, preds, scores, cache_decoder, cache_lm, logits, len_decoded, finished

        def not_finished(i, preds, scores, cache_decoder, cache_lm, logit,
                         len_decoded, finished):
            # i = tf.Print(i, [i], message='i: ', summarize=1000)
            return tf.logical_and(
                tf.reduce_any(tf.logical_not(finished)),
                tf.less(
                    i,
                    tf.reduce_min([tf.shape(encoded)[1],
                                   self.args.max_len])  # maxlen = 100
                ))

        _, preds, scores_am, _, _, logits, len_decoded, finished = tf.while_loop(
            cond=not_finished,
            body=step,
            loop_vars=[
                0, token_init, scores_init, cache_decoder_init, cache_lm_init,
                logits_init, len_decoded_init, finished_init
            ],
            shape_invariants=[
                tf.TensorShape([]),
                tf.TensorShape([None, None]),
                tf.TensorShape([None]),
                tf.TensorShape([None, None, None, None]),
                tf.TensorShape([None, None, None, None]),
                tf.TensorShape([None, None, self.dim_output]),
                tf.TensorShape([None]),
                tf.TensorShape([None])
            ])

        # [batch_size * beam_size, ...]
        len_decoded -= 1 - tf.to_int32(
            finished)  # for decoded length cut by encoded length
        preds = preds[:, 1:]
        not_padding = tf.sequence_mask(len_decoded, dtype=tf.int32)
        preds *= not_padding

        # [batch_size , beam_size, ...]
        if self.args.model.rerank:
            assert self.lm
            with tf.variable_scope(self.args.top_scope, reuse=True):
                with tf.variable_scope(self.args.lm_scope):
                    scores_lm, distribution = self.lm.decoder.score(
                        preds, len_decoded)

            scores_lm = self.args.lambda_rerank * scores_lm
        else:
            scores_lm = tf.zeros_like(scores_am)

        scores = scores_am + scores_lm

        # tf.nn.top_k is used to sort `scores`
        scores_sorted, sorted = tf.nn.top_k(tf.reshape(
            scores, [batch_size, beam_size]),
                                            k=beam_size,
                                            sorted=True)

        sorted = base_indices * beam_size + tf.reshape(
            sorted, shape=[-1])  # [batch_size * beam_size]

        # [batch_size * beam_size, ...]
        logits_sorted = tf.gather(logits, sorted)
        preds_sorted = tf.gather(preds, sorted)
        len_decoded_sorted = tf.gather(len_decoded, sorted)
        scores_lm_sorted = tf.gather(scores_lm, sorted)
        scores_am_sorted = tf.gather(scores_am, sorted)

        # [batch_size, beam_size, ...]
        scores_lm_sorted = tf.reshape(scores_lm_sorted,
                                      shape=[batch_size, beam_size])
        scores_am_sorted = tf.reshape(scores_am_sorted,
                                      shape=[batch_size, beam_size])
        preds_sorted = tf.reshape(
            preds_sorted, shape=[batch_size, beam_size,
                                 -1])  # [batch_size, beam_size, max_length]
        logits_sorted = tf.reshape(
            logits_sorted, [batch_size, beam_size, -1, self.dim_output])
        len_decoded_sorted = tf.reshape(len_decoded_sorted,
                                        [batch_size, beam_size])

        # return logits, final_preds, len_encoded
        return [
            logits_sorted, preds_sorted, len_decoded_sorted, scores_am_sorted,
            scores_lm_sorted
        ], preds_sorted[:, 0, :], len_decoded_sorted[:, 0]
コード例 #43
0
    def __init__(self,
                 source_vocab_size,
                 target_vocab_size,
                 buckets,
                 text_hidden_size,
                 speech_hidden_size,
                 parse_hidden_size,
                 text_num_layers,
                 speech_num_layers,
                 parse_num_layers,
                 embedding_size,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 learning_rate_decay_factor,
                 optimizer,
                 use_lstm=True,
                 output_keep_prob=0.8,
                 num_samples=512,
                 forward_only=False):
        """Create the model.
    """
        self.source_vocab_size = source_vocab_size
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.batch_size = batch_size
        self.epoch = 0

        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable("proj_w",
                                [hidden_size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable("proj_b", [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                labels = tf.reshape(labels, [-1, 1])
                return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                  num_samples,
                                                  self.target_vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        def create_cell(hidden_size, num_layers):
            single_cell = tf.nn.rnn_cell.GRUCell(hidden_size)
            if use_lstm:
                single_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size,
                                                           state_is_tuple=True)
                #single_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)
            if not forward_only:
                # always use dropout; set keep_prob=1 if not dropout
                print("Training mode; dropout used!")
                single_cell = tf.nn.rnn_cell.DropoutWrapper(
                    single_cell, output_keep_prob=output_keep_prob)
            cell = single_cell
            if num_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers,
                                                   state_is_tuple=True)
                #cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
            return cell

        text_cell = create_cell(text_hidden_size, text_num_layers)
        speech_cell = create_cell(speech_hidden_size, speech_num_layers)
        parse_cell = create_cell(parse_hidden_size, parse_num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs_list, decoder_inputs, text_len, do_decode,
                      attn_vec_size):
            return many2one_seq2seq.many2one_attention_seq2seq(
                encoder_inputs_list,
                decoder_inputs,
                text_len,
                text_cell,
                speech_cell,
                parse_cell,
                num_encoder_symbols=source_vocab_size,
                num_decoder_symbols=target_vocab_size,
                embedding_size=embedding_size,
                output_projection=output_projection,
                feed_previous=do_decode,
                attention_vec_size=attn_vec_size)

        # Feeds for inputs.
        #self.encoder_inputs = []
        self.text_encoder_inputs = []
        self.speech_encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
            self.text_encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="text_encoder{0}".format(i)))
        for i in xrange(buckets[-1][0] * spscale):
            self.speech_encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=[None, mfcc_num],
                               name="speech_encoder{0}".format(i)))
        for i in xrange(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))
        self.encoder_inputs_list = [
            self.text_encoder_inputs, self.speech_encoder_inputs
        ]

        # seq_len stuff:
        _batch_size = tf.shape(self.text_encoder_inputs[0])[0]
        self.seq_len = tf.fill(tf.expand_dims(_batch_size, 0),
                               tf.constant(2, dtype=tf.int64))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in xrange(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = many2one_seq2seq.many2one_model_with_buckets(
                self.encoder_inputs_list,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.seq_len,
                buckets,
                lambda x, y, z: seq2seq_f(x, y, z, True, attn_vec_size),
                softmax_loss_function=softmax_loss_function,
                spscale=spscale)
            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in xrange(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = many2one_seq2seq.many2one_model_with_buckets(
                self.encoder_inputs_list,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.seq_len,
                buckets,
                lambda x, y, z: seq2seq_f(x, y, z, False, attn_vec_size),
                softmax_loss_function=softmax_loss_function,
                spscale=spscale)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            #opt = tf.train.AdagradOptimizer(self.learning_rate)
            ## Make optimizer a hyperparameter
            if optimizer == "momentum":
                opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9)
            elif optimizer == "grad_descent":
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            elif optimizer == "adagrad":
                print("Using adagrad optimizer")
                opt = tf.train.AdagradOptimizer(self.learning_rate)
            else:
                print("Using Adam optimizer")
                opt = tf.train.AdamOptimizer(self.learning_rate)

            for b in xrange(len(buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables())
コード例 #44
0
 def _build(self, obs_input, act_input, name=None):
     return_var = tf.compat.v1.get_variable(
         'return_var', (), initializer=tf.constant_initializer(0.5))
     return tf.fill((tf.shape(obs_input)[0], self.output_dim), return_var)
コード例 #45
0
def build_graph(mode, config, sequence_example_file_paths=None):
  """Builds the TensorFlow graph.

  Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    config: An EventSequenceRnnConfig containing the encoder/decoder and HParams
        to use.
    sequence_example_file_paths: A list of paths to TFRecord files containing
        tf.train.SequenceExample protos. Only needed for training and
        evaluation. May be a sharded file of the form.

  Returns:
    A tf.Graph instance which contains the TF ops.

  Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate'.
  """
  if mode not in ('train', 'eval', 'generate'):
    raise ValueError("The mode parameter must be 'train', 'eval', "
                     "or 'generate'. The mode parameter was: %s" % mode)

  hparams = config.hparams
  encoder_decoder = config.encoder_decoder

  tf.logging.info('hparams = %s', hparams.values())

  input_size = encoder_decoder.input_size
  num_classes = encoder_decoder.num_classes
  no_event_label = encoder_decoder.default_event_label

  with tf.Graph().as_default() as graph:
    inputs, labels, lengths, = None, None, None
    state_is_tuple = True

    if mode == 'train' or mode == 'eval':
      inputs, labels, lengths, ids = magenta.common.get_padded_batch(
          sequence_example_file_paths, hparams.batch_size, input_size)
      tf.add_to_collection('ids', ids)

    elif mode == 'generate':
      inputs = tf.placeholder(tf.float32, [hparams.batch_size, None,
                                           input_size])
      # If state_is_tuple is True, the output RNN cell state will be a tuple
      # instead of a tensor. During training and evaluation this improves
      # performance. However, during generation, the RNN cell state is fed
      # back into the graph with a feed dict. Feed dicts require passed in
      # values to be tensors and not tuples, so state_is_tuple is set to False.
      state_is_tuple = False

    if config.learn_initial_state:
      state_is_tuple = False

    cell = make_rnn_cell(hparams.rnn_layer_sizes,
                         dropout_keep_prob=hparams.dropout_keep_prob,
                         attn_length=hparams.attn_length,
                         state_is_tuple=state_is_tuple)

    # Old: use zero
    if not config.learn_initial_state or mode == 'generate':
      initial_state = cell.zero_state(hparams.batch_size, tf.float32)
    # Learn initial state, complex variable/placeholder construction
    else:
      initial_state_size = cell.zero_state(hparams.batch_size, tf.float32).get_shape()
      initial_state_in = tf.placeholder(tf.float32, shape=initial_state_size)
      initial_state = tf.Variable(initial_state_in, tf.float32)
      tf.add_to_collection('initial_state_size', initial_state_size.as_list())
      tf.add_to_collection('initial_state_in', initial_state_in)
      tf.add_to_collection('initial_state', initial_state)
      tf.add_to_collection('initial_state_init', tf.variables_initializer([initial_state]))

    outputs, final_state = tf.nn.dynamic_rnn(
        cell, inputs, lengths, initial_state, parallel_iterations=1,
        swap_memory=True)

    outputs_flat = tf.reshape(outputs, [-1, hparams.rnn_layer_sizes[-1]])
    logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

    if mode == 'train' or mode == 'eval':
      if hparams.skip_first_n_losses:
        logits = tf.reshape(logits_flat, [hparams.batch_size, -1, num_classes])
        logits = logits[:, hparams.skip_first_n_losses:, :]
        logits_flat = tf.reshape(logits, [-1, num_classes])
        labels = labels[:, hparams.skip_first_n_losses:]

      labels_flat = tf.reshape(labels, [-1])
      softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=labels_flat, logits=logits_flat)
      loss = tf.reduce_mean(softmax_cross_entropy)
      perplexity = tf.reduce_mean(tf.exp(softmax_cross_entropy))

      correct_predictions = tf.to_float(
          tf.nn.in_top_k(logits_flat, labels_flat, 1))
      accuracy = tf.reduce_mean(correct_predictions) * 100

      event_positions = tf.to_float(tf.not_equal(labels_flat, no_event_label))
      event_accuracy = tf.truediv(
          tf.reduce_sum(tf.multiply(correct_predictions, event_positions)),
          tf.reduce_sum(event_positions)) * 100

      no_event_positions = tf.to_float(tf.equal(labels_flat, no_event_label))
      no_event_accuracy = tf.truediv(
          tf.reduce_sum(tf.multiply(correct_predictions, no_event_positions)),
          tf.reduce_sum(no_event_positions)) * 100

      global_step = tf.Variable(0, trainable=False, name='global_step')

      tf.add_to_collection('loss', loss)
      tf.add_to_collection('perplexity', perplexity)
      tf.add_to_collection('accuracy', accuracy)
      tf.add_to_collection('global_step', global_step)

      summaries = [
          tf.summary.scalar('loss', loss),
          tf.summary.scalar('perplexity', perplexity),
          tf.summary.scalar('accuracy', accuracy),
          tf.summary.scalar(
              'event_accuracy', event_accuracy),
          tf.summary.scalar(
              'no_event_accuracy', no_event_accuracy),
      ]

      if mode == 'train':
        learning_rate = tf.train.exponential_decay(
            hparams.initial_learning_rate, global_step, hparams.decay_steps,
            hparams.decay_rate, staircase=True, name='learning_rate')

        opt = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients,
                                                      hparams.clip_norm)
        train_op = opt.apply_gradients(zip(clipped_gradients, params),
                                       global_step)
        tf.add_to_collection('learning_rate', learning_rate)
        tf.add_to_collection('train_op', train_op)

        m = tf.placeholder(tf.float32)
        v = tf.placeholder(tf.float32)
        assign_m = opt.get_slot(initial_state, 'm').assign(m)
        assign_v = opt.get_slot(initial_state, 'v').assign(v)
        read_m = opt.get_slot(initial_state, 'm')
        read_v = opt.get_slot(initial_state, 'v')

        tf.add_to_collection('m', m)
        tf.add_to_collection('v', v)
        tf.add_to_collection('assign_m', assign_m)
        tf.add_to_collection('assign_v', assign_v)
        tf.add_to_collection('read_m', read_m)
        tf.add_to_collection('read_v', read_v)



        summaries.append(tf.summary.scalar(
            'learning_rate', learning_rate))

      if mode == 'eval':
        summary_op = tf.summary.merge(summaries)
        tf.add_to_collection('summary_op', summary_op)

    elif mode == 'generate':
      temperature = tf.placeholder(tf.float32, [])
      softmax_flat = tf.nn.softmax(
          tf.div(logits_flat, tf.fill([num_classes], temperature)))
      softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes])

      tf.add_to_collection('inputs', inputs)
      tf.add_to_collection('initial_state', initial_state)
      tf.add_to_collection('final_state', final_state)
      tf.add_to_collection('temperature', temperature)
      tf.add_to_collection('softmax', softmax)

    init_op = tf.global_variables_initializer()
    tf.add_to_collection('init_op', init_op)

  return graph
コード例 #46
0
    def _decode(self, input_dict):
        """Decodes representation into data.

    Args:
      input_dict (dict): Python dictionary with inputs to decoder.


    Config parameters:

    * **src_inputs** --- Decoder input Tensor of shape [batch_size, time, dim]
      or [time, batch_size, dim].
    * **src_lengths** --- Decoder input lengths Tensor of shape [batch_size]
    * **tgt_inputs** --- Only during training. labels Tensor of the
      shape [batch_size, time] or [time, batch_size].
    * **tgt_lengths** --- Only during training. labels lengths
      Tensor of the shape [batch_size].

    Returns:
      dict: Python dictionary with:
      * outputs - [predictions, alignments, enc_src_lengths].
        predictions are the final predictions of the model. tensor of shape [batch_size, time].
        alignments are the attention probabilities if attention is used. None if 'plot_attention' in attention_params is set to False.
        enc_src_lengths are the lengths of the input. tensor of shape [batch_size].
      * logits - logits with the shape=[batch_size, output_dim].
      * tgt_length - tensor of shape [batch_size] indicating the predicted sequence lengths.
    """
        encoder_outputs = input_dict['encoder_output']['outputs']
        enc_src_lengths = input_dict['encoder_output']['src_length']

        self._batch_size = int(encoder_outputs.get_shape()[0])
        self._beam_width = self.params.get("beam_width", 1)

        tgt_inputs = None
        tgt_lengths = None
        if 'target_tensors' in input_dict:
            tgt_inputs = input_dict['target_tensors'][0]
            tgt_lengths = input_dict['target_tensors'][1]
            tgt_inputs = tf.concat([
                tf.fill([self._batch_size, 1], self.GO_SYMBOL),
                tgt_inputs[:, :-1]
            ], -1)

        layer_type = self.params['rnn_type']
        num_layers = self.params['num_layers']
        attention_params = self.params['attention_params']
        hidden_dim = self.params['hidden_dim']
        dropout_keep_prob = self.params.get(
            'dropout_keep_prob', 1.0) if self._mode == "train" else 1.0

        # To-Do Seperate encoder and decoder position embeddings
        use_positional_embedding = self.params.get("pos_embedding", False)
        use_language_model = self.params.get("use_language_model", False)
        use_beam_search_decoder = (self._beam_width != 1) and (self._mode
                                                               == "infer")

        self._target_emb_layer = tf.get_variable(
            name='TargetEmbeddingMatrix',
            shape=[self._tgt_vocab_size, self._tgt_emb_size],
            dtype=tf.float32,
        )

        if use_positional_embedding:
            self.enc_pos_emb_size = int(encoder_outputs.get_shape()[-1])
            self.enc_pos_emb_layer = tf.get_variable(
                name='EncoderPositionEmbeddingMatrix',
                shape=[1024, self.enc_pos_emb_size],
                dtype=tf.float32,
            )
            encoder_output_positions = tf.range(0,
                                                tf.shape(encoder_outputs)[1],
                                                delta=1,
                                                dtype=tf.int32,
                                                name='positional_inputs')
            encoder_position_embeddings = tf.cast(tf.nn.embedding_lookup(
                self.enc_pos_emb_layer, encoder_output_positions),
                                                  dtype=encoder_outputs.dtype)
            encoder_outputs += encoder_position_embeddings

            self.dec_pos_emb_size = self._tgt_emb_size
            self.dec_pos_emb_layer = tf.get_variable(
                name='DecoderPositionEmbeddingMatrix',
                shape=[1024, self.dec_pos_emb_size],
                dtype=tf.float32,
            )

        output_projection_layer = FullyConnected(
            [self._tgt_vocab_size],
            dropout_keep_prob=dropout_keep_prob,
            mode=self._mode,
        )

        rnn_cell = cells_dict[layer_type]

        dropout = tf.nn.rnn_cell.DropoutWrapper

        multirnn_cell = tf.nn.rnn_cell.MultiRNNCell([
            dropout(rnn_cell(hidden_dim), output_keep_prob=dropout_keep_prob)
            for _ in range(num_layers)
        ])

        if use_beam_search_decoder:
            encoder_outputs = tf.contrib.seq2seq.tile_batch(
                encoder_outputs,
                multiplier=self._beam_width,
            )
            enc_src_lengths = tf.contrib.seq2seq.tile_batch(
                enc_src_lengths,
                multiplier=self._beam_width,
            )

        attention_dim = attention_params["attention_dim"]
        attention_type = attention_params["attention_type"]
        num_heads = attention_params["num_heads"]
        plot_attention = attention_params["plot_attention"]
        if plot_attention:
            if use_beam_search_decoder:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Beam Search Decoding")
            if num_heads != 1:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Multi Head Attention")
            if self.params['dtype'] != tf.float32:
                plot_attention = False
                print(
                    "Plotting Attention is disabled for Mixed Precision Mode")

        attention_params_dict = {}
        if attention_type == "bahadanu":
            AttentionMechanism = BahdanauAttention
            attention_params_dict["normalize"] = False,
        elif attention_type == "chorowski":
            AttentionMechanism = LocationSensitiveAttention
            attention_params_dict["use_coverage"] = attention_params[
                "use_coverage"]
            attention_params_dict["location_attn_type"] = attention_type
            attention_params_dict["location_attention_params"] = {
                'filters': 10,
                'kernel_size': 101
            }
        elif attention_type == "zhaopeng":
            AttentionMechanism = LocationSensitiveAttention
            attention_params_dict["use_coverage"] = attention_params[
                "use_coverage"]
            attention_params_dict["query_dim"] = hidden_dim
            attention_params_dict["location_attn_type"] = attention_type

        attention_mechanism = []

        for head in range(num_heads):
            attention_mechanism.append(
                AttentionMechanism(num_units=attention_dim,
                                   memory=encoder_outputs,
                                   memory_sequence_length=enc_src_lengths,
                                   probability_fn=tf.nn.softmax,
                                   dtype=tf.get_variable_scope().dtype,
                                   **attention_params_dict))

        multirnn_cell_with_attention = AttentionWrapper(
            cell=multirnn_cell,
            attention_mechanism=attention_mechanism,
            attention_layer_size=[hidden_dim for i in range(num_heads)],
            output_attention=True,
            alignment_history=plot_attention,
        )

        if self._mode == "train":
            decoder_output_positions = tf.range(0,
                                                tf.shape(tgt_inputs)[1],
                                                delta=1,
                                                dtype=tf.int32,
                                                name='positional_inputs')
            tgt_input_vectors = tf.nn.embedding_lookup(self._target_emb_layer,
                                                       tgt_inputs)
            if use_positional_embedding:
                tgt_input_vectors += tf.nn.embedding_lookup(
                    self.dec_pos_emb_layer, decoder_output_positions)
            tgt_input_vectors = tf.cast(
                tgt_input_vectors,
                dtype=self.params['dtype'],
            )
            # helper = tf.contrib.seq2seq.TrainingHelper(
            helper = TrainingHelper(
                inputs=tgt_input_vectors,
                sequence_length=tgt_lengths,
            )
        elif self._mode == "infer" or self._mode == "eval":
            embedding_fn = lambda ids: tf.cast(
                tf.nn.embedding_lookup(self._target_emb_layer, ids),
                dtype=self.params['dtype'],
            )
            pos_embedding_fn = None
            if use_positional_embedding:
                pos_embedding_fn = lambda ids: tf.cast(
                    tf.nn.embedding_lookup(self.dec_pos_emb_layer, ids),
                    dtype=self.params['dtype'],
                )

            # helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            helper = GreedyEmbeddingHelper(
                embedding=embedding_fn,
                start_tokens=tf.fill([self._batch_size], self.GO_SYMBOL),
                end_token=self.END_SYMBOL,
                positional_embedding=pos_embedding_fn)

        if self._mode != "infer":
            maximum_iterations = tf.reduce_max(tgt_lengths)
        else:
            maximum_iterations = tf.reduce_max(enc_src_lengths)

        if not use_beam_search_decoder:
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=multirnn_cell_with_attention,
                helper=helper,
                initial_state=multirnn_cell_with_attention.zero_state(
                    batch_size=self._batch_size,
                    dtype=encoder_outputs.dtype,
                ),
                output_layer=output_projection_layer,
            )
        else:
            batch_size_tensor = tf.constant(self._batch_size)
            decoder = BeamSearchDecoder(
                cell=multirnn_cell_with_attention,
                embedding=embedding_fn,
                start_tokens=tf.tile([self.GO_SYMBOL], [self._batch_size]),
                end_token=self.END_SYMBOL,
                initial_state=multirnn_cell_with_attention.zero_state(
                    dtype=encoder_outputs.dtype,
                    batch_size=batch_size_tensor * self._beam_width,
                ),
                beam_width=self._beam_width,
                output_layer=output_projection_layer,
                length_penalty_weight=0.0,
            )

        final_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            impute_finished=self.mode != "infer",
            maximum_iterations=maximum_iterations,
        )

        if plot_attention:
            alignments = tf.transpose(final_state.alignment_history[0].stack(),
                                      [1, 0, 2])
        else:
            alignments = None

        if not use_beam_search_decoder:
            outputs = tf.argmax(final_outputs.rnn_output, axis=-1)
            logits = final_outputs.rnn_output
            return_outputs = [outputs, alignments, enc_src_lengths]
        else:
            outputs = final_outputs.predicted_ids[:, :, 0]
            logits = final_outputs.predicted_ids[:, :, 0]
            return_outputs = [outputs, enc_src_lengths]

        if self.mode == "eval":
            max_len = tf.reduce_max(tgt_lengths)
            logits = tf.while_loop(
                lambda logits: max_len > tf.shape(logits)[1],
                lambda logits: tf.concat([
                    logits,
                    tf.fill([tf.shape(logits)[0], 1,
                             tf.shape(logits)[2]],
                            tf.cast(1.0, self.params['dtype']))
                ], 1),
                loop_vars=[logits],
                back_prop=False,
            )

        return {
            'outputs': return_outputs,
            'logits': logits,
            'tgt_length': final_sequence_lengths,
        }
コード例 #47
0
    def _build_decoder(self, encoder_outputs, encoder_state, hparams):
        """Build and run a RNN decoder with a final projection layer.

    Args:
      encoder_outputs: The outputs of encoder for every time step.
      encoder_state: The final state of the encoder.
      hparams: The Hyperparameters configurations.

    Returns:
      A tuple of final logits and final decoder state:
        logits: size [time, batch_size, vocab_size] when time_major=True.
    """
        tgt_sos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.sos)), tf.int32)
        tgt_eos_id = tf.cast(
            self.tgt_vocab_table.lookup(tf.constant(hparams.eos)), tf.int32)

        num_layers = hparams.num_layers
        num_gpus = hparams.num_gpus

        iterator = self.iterator

        # maximum_iteration: The maximum decoding steps.
        maximum_iterations = self._get_infer_maximum_iterations(
            hparams, iterator.source_sequence_length)

        ## Decoder.
        with tf.variable_scope("decoder") as decoder_scope:
            cell, decoder_initial_state = self._build_decoder_cell(
                hparams, encoder_outputs, encoder_state,
                iterator.source_sequence_length)

            ## Train or eval
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                # decoder_emp_inp: [max_time, batch_size, num_units]
                target_input = iterator.target_input
                if self.time_major:
                    target_input = tf.transpose(target_input)
                decoder_emb_inp = tf.reshape(target_input, [
                    self.get_max_time(target_input), hparams.batch_size,
                    hparams.num_units
                ])  #tf.nn.embedding_lookup(
                #self.embedding_decoder, target_input)

                # Helper
                helper = tf.contrib.seq2seq.TrainingHelper(
                    decoder_emb_inp,
                    iterator.target_sequence_length,
                    time_major=self.time_major)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell,
                    helper,
                    decoder_initial_state,
                )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                sample_id = outputs.sample_id

                # Note: there's a subtle difference here between train and inference.
                # We could have set output_layer when create my_decoder
                #   and shared more code between train and inference.
                # We chose to apply the output_layer to all timesteps for speed:
                #   10% improvements for small models & 20% for larger ones.
                # If memory is a concern, we should apply output_layer per timestep.
                device_id = num_layers if num_layers < num_gpus else (
                    num_layers - 1)
                with tf.device(model_helper.get_device_str(
                        device_id, num_gpus)):
                    logits = self.output_layer(outputs.rnn_output)

            ## Inference
            else:
                beam_width = hparams.beam_width
                length_penalty_weight = hparams.length_penalty_weight
                start_tokens = tf.fill([self.batch_size], tgt_sos_id)
                end_token = tgt_eos_id

                if beam_width > 0:
                    my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=self.embedding_decoder,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=beam_width,
                        output_layer=self.output_layer,
                        length_penalty_weight=length_penalty_weight)
                else:
                    # Helper
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        self.embedding_decoder, start_tokens, end_token)

                    # Decoder
                    my_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell,
                        helper,
                        decoder_initial_state,
                        output_layer=self.output_layer  # applied per timestep
                    )

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=self.time_major,
                    swap_memory=True,
                    scope=decoder_scope)

                if beam_width > 0:
                    logits = tf.no_op()
                    sample_id = outputs.predicted_ids
                else:
                    logits = outputs.rnn_output
                    sample_id = outputs.sample_id

        return logits, sample_id, final_context_state
コード例 #48
0
ファイル: start_anew2.py プロジェクト: CMFell/cnn_experiments
def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:5]

    cell_x = tf.to_float(
        tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]),
                   (1, GRID_H, GRID_W, 1, 1)))
    # ridiculous equivalent of np.repeat
    cell_y = tf.reshape(
        tf.tile(tf.reshape(tf.range(GRID_H), [-1, 1]), [1, GRID_W]), [-1])
    # tile and reshape in same way as cell_x
    cell_y = tf.to_float(tf.reshape(cell_y, (1, GRID_H, GRID_W, 1, 1)))
    # combine to give grid
    cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1),
                        [BATCH_SIZE, 1, 1, 5, 1])

    seen = tf.Variable(0.)
    """
    Adjust Predictions
    """
    # adjust x and y
    pred_box_xy = tf.sigmoid(y_pred[..., :2])
    # new line convert to whole image
    pred_box_xy_wi = pred_box_xy + cell_grid
    pred_box_xy_wi = tf.divide(pred_box_xy_wi, [GRID_W, GRID_H])

    # adjust w and h
    pred_box_wh = y_pred[..., 2:4]
    # new line adjust so relative to whole image
    pred_box_wh_wi = tf.exp(y_pred[..., 2:4]) * tf.reshape(
        ANCHORS, [1, 1, 1, BOX, 2])
    pred_box_wh_wi = tf.divide(pred_box_wh_wi, [GRID_W, GRID_H])

    # adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])

    # adjust class probabilities
    pred_box_class = tf.sigmoid(y_pred[..., 5])
    """
    Adjust ground truth for just cells with a centre of a ground truth
    """
    # adjust x and y
    true_box_xy = y_true[..., 0:2]  # relative position to the containing cell
    # add new line give relative to whole image
    true_box_xy_wi = tf.divide(tf.add(true_box_xy, cell_grid),
                               [GRID_W, GRID_H])

    # get w and h
    true_box_wh_wi = y_true[..., 2:4]
    # adjust w and h
    true_box_wh = tf.multiply(true_box_wh_wi, [GRID_W, GRID_H])
    true_box_wh = true_box_wh / tf.reshape(ANCHORS, [1, 1, 1, BOX, 2])
    true_box_wh = tf.log(true_box_wh + 0.00001)
    # the + 0.00001 takes out zeros which can't be logged these should then be multiplied by zero again later

    # adjust confidence
    true_wh_half = true_box_wh_wi / 2.
    true_mins = true_box_xy_wi - true_wh_half
    true_maxes = true_box_xy_wi + true_wh_half

    pred_wh_half = pred_box_wh_wi / 2.
    pred_mins = pred_box_xy_wi - pred_wh_half
    pred_maxes = pred_box_xy_wi + pred_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_box_wh_wi[..., 0] * true_box_wh_wi[..., 1]
    pred_areas = pred_box_wh_wi[..., 0] * pred_box_wh_wi[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = tf.truediv(intersect_areas, union_areas)
    """
    Calculate IOU with any truth
    """

    # confidence mask: penalize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_xy_wi = tf.divide(tf.add(true_xy, tf.expand_dims(cell_grid, axis=4)),
                           [GRID_W, GRID_H])
    true_wh_wi = true_boxes[..., 2:4]

    true_wh_half2 = true_wh_wi / 2.
    true_mins2 = true_xy_wi - true_wh_half2
    true_maxes2 = true_xy_wi + true_wh_half2

    pred_xy_wi = tf.expand_dims(pred_box_xy_wi, 4)
    pred_wh_wi = tf.expand_dims(pred_box_wh_wi, 4)

    pred_wh_half2 = pred_wh_wi / 2.
    pred_mins2 = pred_xy_wi - pred_wh_half2
    pred_maxes2 = pred_xy_wi + pred_wh_half2

    intersect_mins2 = tf.maximum(pred_mins2, true_mins2)
    intersect_maxes2 = tf.minimum(pred_maxes2, true_maxes2)
    intersect_wh2 = tf.maximum(intersect_maxes2 - intersect_mins2, 0.)
    intersect_areas2 = intersect_wh2[..., 0] * intersect_wh2[..., 1]

    true_areas2 = true_wh_wi[..., 0] * true_wh_wi[..., 1]
    pred_areas2 = pred_wh_wi[..., 0] * pred_wh_wi[..., 1]

    union_areas2 = pred_areas2 + true_areas2 - intersect_areas2
    iou_scores_all = tf.truediv(intersect_areas2, union_areas2)
    best_ious = tf.reduce_max(iou_scores_all, axis=4)

    # create masks ones and no ones
    noones = tf.to_float(best_ious < NO_OBJ_THRESHOLD)
    ones = y_true[..., 4]
    """
    Warm-up training
    """

    seen = tf.assign_add(seen, 1.)
    warm_xy = tf.fill(mask_shape, 0.5)
    warm_xy = warm_xy[..., 0:2]
    warm_wh = tf.fill(mask_shape, 0.)
    warm_wh = warm_wh[..., 2:4]
    warm_no = tf.fill(mask_shape[0:4], 1.)

    true_box_xy, true_box_wh, coord_scale, coord_mask = tf.cond(
        tf.less(seen,
                WARM_UP_BATCHES), lambda: [warm_xy, warm_wh, 0.01, warm_no],
        lambda: [true_box_xy, true_box_wh, COORD_SCALE, ones])
    """
    Finalize the loss
    """

    loss_conf = tf.sqrt(
        tf.reduce_sum(
            tf.square((iou_scores - pred_box_conf) * ones * OBJECT_SCALE)))
    loss_noconf = tf.sqrt(
        tf.reduce_sum(
            tf.square((0. - pred_box_conf) * noones * NO_OBJECT_SCALE)))
    loss_class = tf.sqrt(
        tf.reduce_sum(tf.square((1. - pred_box_class) * ones * CLASS_SCALE)))
    coord_mask = tf.expand_dims(coord_mask, axis=-1)
    loss_xy = tf.sqrt(
        tf.reduce_sum(
            tf.square((true_box_xy - pred_box_xy) * coord_mask * COORD_SCALE)))
    loss_wh = tf.sqrt(
        tf.reduce_sum(
            tf.square((true_box_wh - pred_box_wh) * coord_mask * COORD_SCALE)))

    loss_all = loss_xy + loss_wh + loss_conf + loss_class + loss_noconf

    loss = tf.square(loss_all)
    """
    Debugging code
    """

    # test1 = pred_box_conf
    test2 = tf.reduce_max(pred_box_xy)
    test3 = tf.reduce_max(true_box_xy)

    loss = tf.Print(loss, [test2, test3], message='\t')
    loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
    loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
    loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
    loss = tf.Print(loss, [loss_noconf],
                    message='Loss No Conf \t',
                    summarize=1000)
    loss = tf.Print(loss, [loss_class],
                    message='Loss Class \t',
                    summarize=1000)
    loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)

    return loss
コード例 #49
0
    def __init__(self):
        # 1. 定义输入
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])

        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)

        batch_size = tf.shape(self.X)[0]
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)  # 解码的输入加起始标志
        # initializer = tf.initializers.random_normal(stddev=0.1)
        logits = tf.reduce_mean(model_GPT2.model(params, self.X)['logits'], axis=1)

        state_proj = tf.layers.Dense(params.n_embd)
        init_state = state_proj(logits)  # 构造解码的初始状态

        # 词嵌入
        embedding = tf.Variable(tf.random_uniform([len(id2vocab_to), params.n_embd], -1, 1))

        cell = tf.nn.rnn_cell.LSTMCell(params.n_embd)
        vocab_proj = tf.layers.Dense(len(id2vocab_to))

        # 解码
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=tf.nn.embedding_lookup(embedding, decoder_input),
            sequence_length=tf.to_int32(self.Y_seq_len)
        )

        encoder_state = tf.nn.rnn_cell.LSTMStateTuple(c=init_state, h=init_state)

        decoder = tf.contrib.seq2seq.BasicDecoder(cell=cell,
                                                  helper=helper,
                                                  initial_state=encoder_state,
                                                  output_layer=vocab_proj
                                                  )

        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            maximum_iterations=tf.reduce_max(self.Y_seq_len)
        )
        # 推理
        # 贪婪搜索
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding=embedding,
                                                          start_tokens=tf.tile(
                                                              tf.constant([GO], dtype=tf.int32),
                                                              [tf.shape(init_state)[0]]
                                                          ),
                                                          end_token=EOS
                                                          )

        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=cell,
            helper=helper,
            initial_state=encoder_state,
            output_layer=vocab_proj
        )

        predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            maximum_iterations=2*tf.reduce_max(self.X_seq_len)
        )

        self.training_logits = decoder_output.rnn_output
        self.predicting_ids = predicting_decoder_output.sample_id
        self.logits = decoder_output.sample_id
        masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(
            logits=self.training_logits,
            targets=self.Y,
            weights=masks
        )

        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)

        y_t = tf.argmax(self.training_logits, axis=2)
        y_t = tf.cast(y_t, tf.int32)

        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
コード例 #50
0
    def call(self,
             inputs,
             attention_mask=None,
             token_type_ids=None,
             position_ids=None,
             head_mask=None,
             inputs_embeds=None,
             training=False):
        if isinstance(inputs, (tuple, list)):
            input_ids = inputs[0]
            attention_mask = inputs[1] if len(inputs) > 1 else attention_mask
            token_type_ids = inputs[2] if len(inputs) > 2 else token_type_ids
            position_ids = inputs[3] if len(inputs) > 3 else position_ids
            head_mask = inputs[4] if len(inputs) > 4 else head_mask
            inputs_embeds = inputs[5] if len(inputs) > 5 else inputs_embeds
            assert len(inputs) <= 6, "Too many inputs."
        elif isinstance(inputs, dict):
            input_ids = inputs.get('input_ids')
            attention_mask = inputs.get('attention_mask', attention_mask)
            token_type_ids = inputs.get('token_type_ids', token_type_ids)
            position_ids = inputs.get('position_ids', position_ids)
            head_mask = inputs.get('head_mask', head_mask)
            inputs_embeds = inputs.get('inputs_embeds', inputs_embeds)
            assert len(inputs) <= 6, "Too many inputs."
        else:
            input_ids = inputs

        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_shape = shape_list(input_ids)
        elif inputs_embeds is not None:
            input_shape = shape_list(inputs_embeds)[:-1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        if attention_mask is None:
            attention_mask = tf.fill(input_shape, 1)
        if token_type_ids is None:
            token_type_ids = tf.fill(input_shape, 0)

        # We create a 3D attention mask from a 2D tensor mask.
        # Sizes are [batch_size, 1, 1, to_seq_length]
        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
        # this attention mask is more simple than the triangular masking of causal attention
        # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
        extended_attention_mask = attention_mask[:, tf.newaxis, tf.newaxis, :]

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.

        extended_attention_mask = tf.cast(extended_attention_mask, tf.float32)
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # attention_probs has shape bsz x n_heads x N x N
        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
        if not head_mask is None:
            raise NotImplementedError
        else:
            head_mask = [None] * self.num_hidden_layers
            # head_mask = tf.constant([0] * self.num_hidden_layers)

        embedding_output = self.embeddings(
            [input_ids, position_ids, token_type_ids, inputs_embeds],
            training=training)
        encoder_outputs = self.encoder(
            [embedding_output, extended_attention_mask, head_mask],
            training=training)

        sequence_output = encoder_outputs[0]
        pooled_output = self.pooler(sequence_output[:, 0])

        # add hidden_states and attentions if they are here
        outputs = (
            sequence_output,
            pooled_output,
        ) + encoder_outputs[1:]
        # sequence_output, pooled_output, (hidden_states), (attentions)
        return outputs
コード例 #51
0
                                                  hparams.max_gradient_norm)

    # Optimization
    optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
    train_op = optimizer.apply_gradients(zip(clipped_gradients, params),
                                         global_step=global_step)

    #optimizer = tf.train.GradientDescentOptimizer(hparams.learning_rate)
    #train_op = optimizer.minimize(loss, global_step=global_step)
else:
    #    source_sequence_length = hparams.encoder_length
    #    maximum_iterations = tf.round(tf.reduce_max(source_sequence_length) * 2)
    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
        cell=decoder_cell,
        embedding=embedding_decoder,
        start_tokens=tf.fill([hparams.batch_size], tgt_sos_id),
        end_token=tgt_eos_id,
        initial_state=initial_state,
        beam_width=hparams.beam_width,
        output_layer=projection_layer,
        length_penalty_weight=0.0)
    # Dynamic decoding
    outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
                                                      maximum_iterations=10)
    translations = outputs.predicted_ids

#%%
# Tweet
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
コード例 #52
0
import matplotlib.pyplot as plt
import tensorflow as tf

sess = tf.Session()

x_vals = tf.linspace(-3., 5, 500)
target = tf.constant(1.)
targets = tf.fill([
    500,
], 1.)

# Hinge Loss
hinge_y_vals = tf.maximum(0., 1. - tf.multiply(target, x_vals))
hing_y_out = sess.run(hinge_y_vals)

# Cross-entropy Loss
xentropy_y_vals = -tf.multiply(target, tf.log(x_vals)) - tf.multiply(
    (1. - target), tf.log(1. - x_vals))
xentropy_y_out = sess.run(xentropy_y_vals)

# Sigmoid cross entropy
xentropy_sigmoid_y_vals = tf.nn.sigmoid_cross_entropy_with_logits(
    logits=x_vals, labels=targets)
xentropy_sigmoid_y_out = sess.run(xentropy_sigmoid_y_vals)

# Weighted cross entropy
weight = tf.constant(0.5)
xentropy_weighted_y_vals = tf.nn.weighted_cross_entropy_with_logits(
    targets, x_vals, weight)
xentropy_weighted_y_out = sess.run(xentropy_weighted_y_vals)
コード例 #53
0
def process_decoding_input(targets, word_to_int, batch_size):
    ending = tf.strided_slice(targets, [0, 0], [batch_size, -1], [1, 1])
    decoder_input = tf.concat(
        [tf.fill([batch_size, 1], word_to_int['<GO>']), ending], 1)

    return decoder_input
コード例 #54
0
ファイル: DEN.py プロジェクト: nanarosebp/PhDProject
    def optimization(self, prev_W, selective = False, splitting = False, expansion = None):
        if selective:
            all_var = [ var for var in tf.trainable_variables() if 'layer%d'%self.n_layers in var.name ]
        else:
            all_var = [ var for var in tf.trainable_variables() ]

        l2_losses = []
        for var in all_var:
            l2_losses.append(tf.nn.l2_loss(var))

        opt = tf.train.AdamOptimizer(self.lr)
        regular_terms = []

        if not splitting and expansion == None:
            for var in all_var:
                if var.name in prev_W.keys():
                    prev_w = prev_W[var.name]
                    regular_terms.append(tf.nn.l2_loss(var-prev_w))
        else:
            for var in all_var:
                if var.name in prev_W.keys():
                    prev_w = prev_W[var.name]
                    if len(prev_w.shape) == 1:
                        sliced = var[:prev_w.shape[0]]
                    else:
                        sliced = var[:prev_w.shape[0], :prev_w.shape[1]]
                    regular_terms.append(tf.nn.l2_loss( sliced - prev_w ))

        losses = self.loss + self.l2_lambda * tf.reduce_sum(l2_losses) + \
                    self.regular_lambda * tf.reduce_sum(regular_terms)

        opt = tf.train.AdamOptimizer(self.lr)
        grads = opt.compute_gradients(losses, all_var)
        apply_grads = opt.apply_gradients(grads, global_step = self.g_step)

        l1_var = [ var for var in tf.trainable_variables() ]
        l1_op_list = []
        with tf.control_dependencies([apply_grads]):
            for var in l1_var:
                th_t = tf.fill(tf.shape(var), tf.convert_to_tensor(self.l1_lambda))
                zero_t = tf.zeros(tf.shape(var))
                var_temp = var - (th_t * tf.sign(var))
                l1_op = var.assign(tf.where(tf.less(tf.abs(var), th_t), zero_t, var_temp))
                l1_op_list.append(l1_op)

        GL_var = [var for var in tf.trainable_variables() if 'new' in var.name and ('bw' in var.name or 'tw' in var.name)]
        gl_op_list = []
        with tf.control_dependencies([apply_grads]):
            for var in GL_var:
                g_sum = tf.sqrt(tf.reduce_sum(tf.square(var), 0))
                th_t = self.gl_lambda
                gw = []
                for i in range(var.get_shape()[1]):
                    temp_gw = var[:, i] - (th_t * var[:, i] / g_sum[i])
                    gw_gl = tf.where(tf.less(g_sum[i], th_t), tf.zeros(tf.shape(var[:, i])), temp_gw)
                    gw.append(gw_gl)
                gl_op = var.assign(tf.stack(gw, 1))
                gl_op_list.append(gl_op)

        with tf.control_dependencies(l1_op_list + gl_op_list):
            self.opt = tf.no_op()
コード例 #55
0
    def _dynamic_decode(
        self,
        features,
        encoder_outputs,
        encoder_state,
        encoder_sequence_length,
        tflite_run=False,
    ):
        params = self.params
        batch_size = tf.shape(tf.nest.flatten(encoder_outputs)[0])[0]
        start_ids = tf.fill([batch_size], constants.START_OF_SENTENCE_ID)
        beam_size = params.get("beam_width", 1)

        if beam_size > 1:
            # Tile encoder outputs to prepare for beam search.
            encoder_outputs = tfa.seq2seq.tile_batch(encoder_outputs, beam_size)
            encoder_sequence_length = tfa.seq2seq.tile_batch(
                encoder_sequence_length, beam_size
            )
            encoder_state = tf.nest.map_structure(
                lambda state: tfa.seq2seq.tile_batch(state, beam_size)
                if state is not None
                else None,
                encoder_state,
            )

        # Dynamically decodes from the encoder outputs.
        initial_state = self.decoder.initial_state(
            memory=encoder_outputs,
            memory_sequence_length=encoder_sequence_length,
            initial_state=encoder_state,
        )
        (
            sampled_ids,
            sampled_length,
            log_probs,
            alignment,
            _,
        ) = self.decoder.dynamic_decode(
            self.labels_inputter,
            start_ids,
            initial_state=initial_state,
            decoding_strategy=decoding.DecodingStrategy.from_params(params),
            sampler=decoding.Sampler.from_params(params),
            maximum_iterations=params.get("maximum_decoding_length", 250),
            minimum_iterations=params.get("minimum_decoding_length", 0),
            tflite_output_size=params.get("tflite_output_size", 250)
            if tflite_run
            else None,
        )

        if tflite_run:
            return sampled_ids

        target_tokens = self.labels_inputter.ids_to_tokens.lookup(
            tf.cast(sampled_ids, tf.int64)
        )

        # Maybe replace unknown targets by the source tokens with the highest attention weight.
        if params.get("replace_unknown_target", False):
            if alignment is None:
                raise TypeError(
                    "replace_unknown_target is not compatible with decoders "
                    "that don't return alignment history"
                )
            if not isinstance(self.features_inputter, inputters.WordEmbedder):
                raise TypeError(
                    "replace_unknown_target is only defined when the source "
                    "inputter is a WordEmbedder"
                )
            source_tokens = features["tokens"]
            if beam_size > 1:
                source_tokens = tfa.seq2seq.tile_batch(source_tokens, beam_size)
            # Merge batch and beam dimensions.
            original_shape = tf.shape(target_tokens)
            target_tokens = tf.reshape(target_tokens, [-1, original_shape[-1]])
            align_shape = misc.shape_list(alignment)
            attention = tf.reshape(
                alignment,
                [align_shape[0] * align_shape[1], align_shape[2], align_shape[3]],
            )
            # We don't have attention for </s> but ensure that the attention time dimension matches
            # the tokens time dimension.
            attention = reducer.align_in_time(attention, tf.shape(target_tokens)[1])
            replaced_target_tokens = replace_unknown_target(
                target_tokens, source_tokens, attention
            )
            target_tokens = tf.reshape(replaced_target_tokens, original_shape)

        # Maybe add noise to the predictions.
        decoding_noise = params.get("decoding_noise")
        if decoding_noise:
            target_tokens, sampled_length = _add_noise(
                target_tokens,
                sampled_length,
                decoding_noise,
                params.get("decoding_subword_token", "■"),
                params.get("decoding_subword_token_is_spacer"),
            )
            alignment = None  # Invalidate alignments.

        predictions = {"log_probs": log_probs}
        if self.labels_inputter.tokenizer.in_graph:
            detokenized_text = self.labels_inputter.tokenizer.detokenize(
                tf.reshape(target_tokens, [batch_size * beam_size, -1]),
                sequence_length=tf.reshape(sampled_length, [batch_size * beam_size]),
            )
            predictions["text"] = tf.reshape(detokenized_text, [batch_size, beam_size])
        else:
            predictions["tokens"] = target_tokens
            predictions["length"] = sampled_length
            if alignment is not None:
                predictions["alignment"] = alignment

        # Maybe restrict the number of returned hypotheses based on the user parameter.
        num_hypotheses = params.get("num_hypotheses", 1)
        if num_hypotheses > 0:
            if num_hypotheses > beam_size:
                raise ValueError("n_best cannot be greater than beam_width")
            for key, value in predictions.items():
                predictions[key] = value[:, :num_hypotheses]
        return predictions
コード例 #56
0
def create_sampling_graph(model_fns, features, params, training = False):
    if isinstance(params, (list, tuple)):
        params_list = params
        params = params_list[0]
    else:
        params_list = [params]

    if not isinstance(model_fns, (list, tuple)):
        model_fns = [model_fns]

    decode_length = params.decode_length
    sample_num = params.mrt_sample
    top_beams = params.top_beams

    # [batch, decoded_ids] => [batch, vocab_size]
    def symbols_to_logits_fn(decoded_ids):
        features["target"] = tf.pad(decoded_ids[:, 1:], [[0, 0], [0, 1]])
        features["target_length"] = tf.fill([tf.shape(features["target"])[0]],
                                            tf.shape(features["target"])[1])

        results = []

        for i, model_fn in enumerate(model_fns):
            results.append(model_fn(features, params_list[i]))

        return results

    batch_size = tf.shape(features["source"])[0]
    # append <bos> symbol
    bos_id = params.mapping["target"][params.bos]
    initial_ids = tf.fill([batch_size], tf.constant(bos_id, dtype=tf.int32))

    inputs_old = features["source"]
    inputs_length_old = features["source_length"]
    if training:
        outputs_old = features["target"]
        outputs_length_old = features["target_length"]

    #return
    # Expand the inputs in to the number of samples
    # [batch, length] => [batch, sample_num, length]
    features["source"] = tf.expand_dims(features["source"], 1)
    features["source"] = tf.tile(features["source"], [1, sample_num, 1])
    shape = tf.shape(features["source"])

    # [batch, sample_num, length] => [batch * sample_num, length]
    features["source"] = tf.reshape(features["source"],
                                    [shape[0] * shape[1], shape[2]])

    #return
    # For source sequence length
    features["source_length"] = tf.expand_dims(features["source_length"], 1)
    features["source_length"] = tf.tile(features["source_length"],
                                        [1, sample_num])
    shape = tf.shape(features["source_length"])

    # [batch, sample_num, length] => [batch * sample_num, length]
    features["source_length"] = tf.reshape(features["source_length"],
                                    [shape[0] * shape[1]])

    vocab_size = len(params.vocabulary["target"])
    # Setting decode length to input length + decode_length
    decode_length = tf.to_float(tf.shape(features["target"])[1]) \
                        * tf.constant(params.mrt_length_ratio)
    decode_length = tf.to_int32(decode_length)

    ids = sampler(symbols_to_logits_fn, initial_ids, params.mrt_sample,
                  decode_length, vocab_size,
                  eos_id=params.mapping["target"][params.eos],
                  features=features)

    # Set inputs back to the unexpanded inputs to not to confuse the Estimator
    features["source"] = inputs_old
    features["source_length"] = inputs_length_old
    if training:
        features["target"] = outputs_old
        features["target_length"] = outputs_length_old

    return ids
コード例 #57
0
    def __init__(self, data, args, embed):
        self.init_states = tf.placeholder(tf.float32, (None, args.ch_size),
                                          'ctx_inps')  # batch*ch_size
        self.posts = tf.placeholder(tf.int32, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None, ),
                                           'enc_lens')  # batch
        self.origin_responses = tf.placeholder(tf.int32, (None, None),
                                               'dec_inps')  # batch*len
        self.origin_responses_length = tf.placeholder(tf.int32, (None, ),
                                                      'dec_lens')  # batch

        # deal with original data to adapt encoder and decoder
        batch_size, decoder_len = tf.shape(self.origin_responses)[0], tf.shape(
            self.origin_responses)[1]
        self.responses = tf.split(self.origin_responses, [1, decoder_len - 1],
                                  1)[1]  # no go_id
        self.responses_length = self.origin_responses_length - 1
        self.responses_input = tf.split(self.origin_responses,
                                        [decoder_len - 1, 1],
                                        1)[0]  # no eos_id
        self.responses_target = self.responses
        decoder_len = decoder_len - 1
        self.posts_input = self.posts  # batch*len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build the embedding table and embedding input
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable(
                'embed', [data.vocab_size, args.embedding_size], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.posts_input)  #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        # build rnn_cell
        cell_enc = tf.nn.rnn_cell.GRUCell(args.eh_size)
        cell_ctx = tf.nn.rnn_cell.GRUCell(args.ch_size)
        cell_dec = tf.nn.rnn_cell.GRUCell(args.dh_size)

        # build encoder
        with tf.variable_scope('encoder'):
            encoder_output, encoder_state = dynamic_rnn(cell_enc,
                                                        self.encoder_input,
                                                        self.posts_length,
                                                        dtype=tf.float32,
                                                        scope="encoder_rnn")

        with tf.variable_scope('context'):
            _, self.context_state = cell_ctx(encoder_state, self.init_states)

        # get output projection function
        output_fn = MyDense(data.vocab_size, use_bias=True)
        sampled_sequence_loss = output_projection_layer(
            args.dh_size, data.vocab_size, args.softmax_samples)

        # construct helper and attention
        train_helper = tf.contrib.seq2seq.TrainingHelper(
            self.decoder_input, tf.maximum(self.responses_length, 1))
        infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            self.embed, tf.fill([batch_size], data.go_id), data.eos_id)
        attn_mechanism = tf.contrib.seq2seq.LuongAttention(
            args.dh_size,
            encoder_output,
            memory_sequence_length=tf.maximum(self.posts_length, 1))
        cell_dec_attn = tf.contrib.seq2seq.AttentionWrapper(
            cell_dec, attn_mechanism, attention_layer_size=args.dh_size)
        ctx_state_shaping = tf.layers.dense(self.context_state,
                                            args.dh_size,
                                            activation=None)
        dec_start = cell_dec_attn.zero_state(
            batch_size, dtype=tf.float32).clone(cell_state=ctx_state_shaping)

        # build decoder (train)
        with tf.variable_scope('decoder'):
            decoder_train = tf.contrib.seq2seq.BasicDecoder(
                cell_dec_attn, train_helper, dec_start)
            train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder_train, impute_finished=True, scope="decoder_rnn")
            self.decoder_output = train_outputs.rnn_output
            self.decoder_distribution_teacher, self.decoder_loss = sampled_sequence_loss(
                self.decoder_output, self.responses_target, self.decoder_mask)

        # build decoder (test)
        with tf.variable_scope('decoder', reuse=True):
            decoder_infer = tf.contrib.seq2seq.BasicDecoder(
                cell_dec_attn, infer_helper, dec_start, output_layer=output_fn)
            infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder_infer,
                impute_finished=True,
                maximum_iterations=args.max_sent_length,
                scope="decoder_rnn")
            self.decoder_distribution = infer_outputs.rnn_output
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, data.vocab_size - 2],
                         2)[1], 2) + 2  # for removing UNK

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)
コード例 #58
0
    def __init__(self, size_layer, num_layers, embedded_size, from_dict_size,
                 to_dict_size):
        def cells(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(
                size_layer,
                initializer=tf.orthogonal_initializer(),
                reuse=reuse)

        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])

        self.X_seq_len = tf.count_nonzero(self.X, 1, dtype=tf.int32)  # 计算序列长度
        print(self.X_seq_len)

        self.Y_seq_len = tf.count_nonzero(self.Y, 1, dtype=tf.int32)  # 计算序列长度
        print(self.Y_seq_len)

        batch_size = tf.shape(self.X)[0]

        # 词嵌入
        encoder_embedding = tf.Variable(
            tf.random_uniform([from_dict_size, embedded_size], -1, 1))
        decoder_embedding = tf.Variable(
            tf.random_uniform([to_dict_size, embedded_size], -1, 1))
        encoder_embedded = tf.nn.embedding_lookup(encoder_embedding, self.X)

        # 编码
        encoder_cells = tf.nn.rnn_cell.MultiRNNCell(
            [cells() for _ in range(num_layers)])
        self.encoder_out, self.encoder_state = tf.nn.dynamic_rnn(
            cell=encoder_cells,
            inputs=encoder_embedded,
            sequence_length=self.X_seq_len,
            dtype=tf.float32)

        encoder_state = tuple(self.encoder_state[-1]
                              for _ in range(num_layers))  # 获取的是每层最后的隐态

        # 将self.Y中的样本一个一个提出来 并加上相应的标志
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)

        # 定义解码输出的那个部分
        dense = tf.layers.Dense(to_dict_size)  # 定义一个dense网络
        # 定义解码网络
        decoder_cells = tf.nn.rnn_cell.MultiRNNCell(
            [cells() for _ in range(num_layers)])

        training_helper = tf.contrib.seq2seq.TrainingHelper(
            # 1. 输出进行词嵌入
            inputs=tf.nn.embedding_lookup(decoder_embedding, decoder_input),
            # 2. 获取序列的长度
            sequence_length=self.Y_seq_len,
            # 3. 主轴是否为时间
            time_major=False)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=decoder_cells,
            helper=training_helper,
            initial_state=self.encoder_state,
            output_layer=dense)
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=tf.reduce_max(self.Y_seq_len))
        self.training_logits = training_decoder_output.rnn_output

        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=decoder_embedding,
            start_tokens=tf.tile(tf.constant([GO], dtype=tf.int32),
                                 [batch_size]),
            end_token=EOS)

        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=decoder_cells,
            helper=predicting_helper,
            initial_state=encoder_state,
            output_layer=dense)

        predicting_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=predicting_decoder,
            impute_finished=True,
            maximum_iterations=tf.reduce_max(self.X_seq_len))

        self.predicting_ids = predicting_decoder_output.sample_id

        masks = tf.sequence_mask(self.Y_seq_len,
                                 tf.reduce_max(self.Y_seq_len),
                                 dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(
            logits=self.training_logits, targets=self.Y, weights=masks)
        self.optimizer = tf.train.AdamOptimizer().minimize(self.cost)

        y_t = tf.argmax(self.training_logits, axis=2)
        y_t = tf.cast(y_t, tf.int32)

        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
コード例 #59
0
        def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
            """Internal while_loop body.

      Args:
        time: scalar int32 tensor.
        outputs_ta: structure of TensorArray.
        state: (structure of) state tensors and TensorArrays.
        inputs: (structure of) input tensors.
        finished: bool tensor (keeping track of what's finished).
        sequence_lengths: int32 tensor (keeping track of time of finish).

      Returns:
        `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
          next_sequence_lengths)`.
        ```
      """
            (next_outputs, decoder_state, next_inputs,
             decoder_finished) = decoder.step(time, inputs, state)
            if decoder.tracks_own_finished:
                next_finished = decoder_finished
            else:
                next_finished = tf.logical_or(decoder_finished, finished)
            next_sequence_lengths = tf.where(
                tf.logical_not(finished),
                tf.fill(tf.shape(sequence_lengths), time + 1),
                sequence_lengths)

            tf.contrib.framework.nest.assert_same_structure(
                state, decoder_state)
            tf.contrib.framework.nest.assert_same_structure(
                outputs_ta, next_outputs)
            tf.contrib.framework.nest.assert_same_structure(
                inputs, next_inputs)

            # Zero out output values past finish
            if impute_finished:
                emit = tf.contrib.framework.nest.map_structure(
                    lambda out, zero: tf.where(finished, zero, out),
                    next_outputs, zero_outputs)
            else:
                emit = next_outputs

            # Copy through states past finish
            def _maybe_copy_state(new, cur):
                # TensorArrays and scalar states get passed through.
                if isinstance(cur, tf.TensorArray):
                    pass_through = True
                else:
                    new.set_shape(cur.shape)
                    pass_through = (new.shape.ndims == 0)
                return new if pass_through else tf.where(finished, cur, new)

            if impute_finished:
                next_state = tf.contrib.framework.nest.map_structure(
                    _maybe_copy_state, decoder_state, state)
            else:
                next_state = decoder_state

            outputs_ta = tf.contrib.framework.nest.map_structure(
                lambda ta, out: ta.write(time, out), outputs_ta, emit)
            return (time + 1, outputs_ta, next_state, next_inputs,
                    next_finished, next_sequence_lengths)
コード例 #60
0
ファイル: chatbot.py プロジェクト: Manjunath-Murdi/chat-bot
def preprocess_targets(targets, word2int, batch_size):
    left_side = tf.fill([batch_size, 1], word2int['<SOS>'])
    right_side = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
    preprocessed_targets = tf.concat([left_side, right_side], 1)
    return preprocessed_targets