Exemple #1
0
def _sample_conditional(Xnew, feat, kern, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False, num_samples=None):
    """
    `sample_conditional` will return a sample from the conditinoal distribution.
    In most cases this means calculating the conditional mean m and variance v and then
    returning m + sqrt(v) * eps, with eps ~ N(0, 1).
    However, for some combinations of Mok and Mof more efficient sampling routines exists.
    The dispatcher will make sure that we use the most efficent one.

    :return: N x P (full_output_cov = False) or N x P x P (full_output_cov = True)
    """
    logger.debug("sample conditional: (MixedKernelSharedMof, MixedKernelSeparateMof), SeparateMixedMok")
    if full_cov:
        raise NotImplementedError("full_cov not yet implemented")
    if full_output_cov:
        raise NotImplementedError("full_output_cov not yet implemented")
    independent_cond = conditional.dispatch(object, SeparateIndependentMof, SeparateIndependentMok, object)
    g_mu, g_var = independent_cond(Xnew, feat, kern, f, white=white, q_sqrt=q_sqrt,
                                   full_output_cov=False, full_cov=False)  # N x L, N x L
    g_sample = _sample_mvn(g_mu, g_var, "diag", num_samples=num_samples)  # N x L
    with params_as_tensors_for(kern):
        f_sample = tf.einsum("pl,nl->np", kern.W, g_sample)
        f_mu = tf.einsum("pl,nl->np", kern.W, g_mu)
        # W g_var W.T
        # [P, L] @ [L, L] @ [L, P]
        # \sum_l,l' W_pl g_var_ll' W_p'l'
        # \sum_l W_pl g_var_nl W_p'l
        # -> 
        f_var = tf.einsum("pl,nl,pl->np", kern.W, g_var, kern.W)
    return f_sample, f_mu, f_var
  def _variance(self):
    with tf.control_dependencies(self._runtime_assertions):
      probs = self._marginal_hidden_probs()
      # probs :: num_steps batch_shape num_states
      means = self._observation_distribution.mean()
      # means :: observation_batch_shape[:-1] num_states
      #          observation_event_shape
      means_shape = tf.concat(
          [self.batch_shape_tensor(),
           [self._num_states],
           self._observation_distribution.event_shape_tensor()],
          axis=0)
      means = tf.broadcast_to(means, means_shape)
      # means :: batch_shape num_states observation_event_shape

      observation_event_shape = (
          self._observation_distribution.event_shape_tensor())
      batch_size = tf.reduce_prod(self.batch_shape_tensor())
      flat_probs_shape = [self._num_steps, batch_size, self._num_states]
      flat_means_shape = [
          batch_size,
          1,
          self._num_states,
          tf.reduce_prod(observation_event_shape)]

      flat_probs = tf.reshape(probs, flat_probs_shape)
      # flat_probs :: num_steps batch_size num_states
      flat_means = tf.reshape(means, flat_means_shape)
      # flat_means :: batch_size 1 num_states observation_event_size
      flat_mean = tf.einsum("ijk,jmkl->jiml", flat_probs, flat_means)
      # flat_mean :: batch_size num_steps 1 observation_event_size

      variances = self._observation_distribution.variance()
      variances = tf.broadcast_to(variances, means_shape)
      # variances :: batch_shape num_states observation_event_shape
      flat_variances = tf.reshape(variances, flat_means_shape)
      # flat_variances :: batch_size 1 num_states observation_event_size

      # For a mixture of n distributions with mixture probabilities
      # p[i], and where the individual distributions have means and
      # variances given by mean[i] and var[i], the variance of
      # the mixture is given by:
      #
      # var = sum i=1..n p[i] * ((mean[i] - mean)**2 + var[i]**2)

      flat_variance = tf.einsum("ijk,jikl->jil",
                                flat_probs,
                                (flat_means - flat_mean)**2 + flat_variances)
      # flat_variance :: batch_size num_steps observation_event_size

      unflat_mean_shape = tf.concat(
          [self.batch_shape_tensor(),
           [self._num_steps],
           observation_event_shape],
          axis=0)

      # returns :: batch_shape num_steps observation_event_shape
      return tf.reshape(flat_variance, unflat_mean_shape)
 def _build_clp_multiplication(self, clp_kernel):
   from TFUtil import safe_log
   input_placeholder = self.input_data.get_placeholder_as_batch_major()
   tf.assert_equal(tf.shape(clp_kernel)[1], tf.shape(input_placeholder)[2] // 2)
   tf.assert_equal(tf.shape(clp_kernel)[2], self._nr_of_filters)
   input_real = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2])
   input_imag = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2])
   kernel_real = self._clp_kernel[0, :, :]
   kernel_imag = self._clp_kernel[1, :, :]
   output_real = tf.einsum('btf,fp->btp', input_real, kernel_real) - tf.einsum('btf,fp->btp', input_imag, kernel_imag)
   output_imag = tf.einsum('btf,fp->btp', input_imag, kernel_real) + tf.einsum('btf,fp->btp', input_real, kernel_imag)
   output_uncompressed = tf.sqrt(tf.pow(output_real, 2) + tf.pow(output_imag, 2))
   output_compressed = safe_log(output_uncompressed)
   return output_compressed
def time_distributed_dense_layer(inputs, output_units, bias=True, activation=None, dropout=None,
                                 scope='time-distributed-dense-layer', reuse=False):
    """
    Applies a shared dense layer to each timestep of a tensor of shape [batch_size, max_seq_len, input_units]
    to produce a tensor of shape [batch_size, max_seq_len, output_units].

    Args:
        inputs: Tensor of shape [batch size, max sequence length, ...].
        output_units: Number of output units.
        activation: activation function.
        dropout: dropout keep prob.

    Returns:
        Tensor of shape [batch size, max sequence length, output_units].

    """
    with tf.variable_scope(scope, reuse=reuse):
        W = tf.get_variable(
            name='weights',
            initializer=tf.contrib.layers.variance_scaling_initializer(),
            shape=[shape(inputs, -1), output_units]
        )
        z = tf.einsum('ijk,kl->ijl', inputs, W)
        if bias:
            b = tf.get_variable(
                name='biases',
                initializer=tf.constant_initializer(),
                shape=[output_units]
            )
            z = z + b
        z = activation(z) if activation else z
        z = tf.nn.dropout(z, dropout) if dropout else z
        return z
def dense_word_embedding_from_chars(chars, embed_dim, bias=True, scope='dense-word-embed', reuse=False):
    """
    Word embeddings via dense transformation + maxpooling of character sequences.

    Args:
        chars: Tensor of shape [batch_size, word sequence length, char sequence length, alphabet size].
        embed_dim: Dimension of word embeddings.  Integer.

    Returns:
        Sequence of embedding vectors.  Tensor of shape [batch_size, word sequence length, embed_dim].

    """
    with tf.variable_scope(scope, reuse=reuse):
        chars = tf.cast(chars, tf.float32)
        W = tf.get_variable(
            name='weights',
            initializer=tf.contrib.layers.variance_scaling_initializer(),
            shape=[shape(chars, -1), embed_dim]
        )
        z = tf.einsum('ijkl,lm->ijkm', chars, W)
        if bias:
            b = tf.get_variable(
                name='biases',
                initializer=tf.constant_initializer(),
                shape=[embed_dim]
            )
            z = z + b
        dense_word_embedding = tf.reduce_max(z, 2)
        return dense_word_embedding
def maxpool_attentive_matching(a, b, a_lengths, b_lengths, max_seq_len, attention_func=dot_attention,
                               attention_func_kwargs={}):
    """
    Matches each vector in a with a vector created by maxpooling over the weighted vectors in b.
    The weightings are determined by the attention matrix.  The attention matrix is
    computed using attention_func.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        attention_func: Function used to calculate attention matrix.  Can be one of the following:
            multiplicative_attention, additive_attention, concat_attention, dot_attention,
            or cosine_attention.
        attention_func_kwargs: Keyword arguments to pass to attention_func.

    Returns:
        Tensor of shape [batch_size, max_seq_len, input_size] consisting of the matching vectors for
        each timestep in a.

    """
    attn = attention_func(a, b, a_lengths, b_lengths, max_seq_len, **attention_func_kwargs)
    return tf.reduce_max(tf.einsum('ijk,ikl->ijkl', attn, b), axis=2)
Exemple #7
0
def _expectation(p, mean1, none1, mean2, none2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <m1(x_n)^T m2(x_n)>_p(x_n)
        - m1(.), m2(.) :: Linear mean functions

    :return: NxQ1xQ2
    """
    with params_as_tensors_for(mean1), params_as_tensors_for(mean2):
        e_xxt = p.cov + (p.mu[:, :, None] * p.mu[:, None, :])  # NxDxD
        e_A1t_xxt_A2 = tf.einsum("iq,nij,jz->nqz", mean1.A, e_xxt, mean2.A)  # NxQ1xQ2
        e_A1t_x_b2t = tf.einsum("iq,ni,z->nqz", mean1.A, p.mu, mean2.b)  # NxQ1xQ2
        e_b1_xt_A2 = tf.einsum("q,ni,iz->nqz", mean1.b, p.mu, mean2.A)  # NxQ1xQ2
        e_b1_b2t = mean1.b[:, None] * mean2.b[None, :]  # Q1xQ2

        return e_A1t_xxt_A2 + e_A1t_x_b2t + e_b1_xt_A2 + e_b1_b2t
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                       scope='additive-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)).  v is a learnable vector and W is a learnable
    matrix. The rows of attn are softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        aW = tf.expand_dims(aW, 2)
        bW = tf.expand_dims(bW, 1)
        v = tf.get_variable(
            name='dot_weights',
            initializer=tf.variance_scaling_initializer(),
            shape=[hidden_units]
        )
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
 def test_invalid(self):
   for axes in self.invalid_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=(3,4)),
       tf.placeholder(tf.float32, shape=(3,4)),
     ]
     with self.assertRaises(ValueError):
       _ = tf.einsum(axes, *inputs)
 def test_dim_mismatch(self):
   for axes, input_shapes in self.dim_mismatch_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=shape)
       for shape in input_shapes
     ]
     with self.assertRaises(ValueError):
       _ = tf.einsum(axes, *inputs)
Exemple #11
0
  def lookahead(self, t, z_prev):
    """Compute the 'lookahead' distribution, p(x_{t:T} | z_{t-1}).

    Args:
      t: A scalar Tensor int, the current timestep. Must be at least 1.
      z_prev: The latent state at time t-1. A Tensor of shape [batch_size].
    Returns:
      p(x_{t:T} | z_{t-1}) as a multivariate normal distribution.
    """
    z_prev = tf.convert_to_tensor(z_prev)
    sigma_zx = self.sigma_zx[t-1, t:]
    z_var = self.sigma_z[t-1, t-1]
    mean = tf.einsum("i,j->ij", z_prev, sigma_zx) / z_var
    variance = (self.sigma_x[t:, t:] -
                tf.einsum("i,j->ij", sigma_zx, sigma_zx) / z_var)
    return tfd.MultivariateNormalFullCovariance(
        loc=mean, covariance_matrix=variance)
def not_fully_connected_layer(inputs, segment_count, segment_dim, num_kernels, nonlinearity=tf.nn.relu):
    weights = tf.Variable(
        tf.truncated_normal(
            [segment_dim, num_kernels], stddev=2. / (num_kernels + segment_dim) ** 0.5), 
        'weights')  
    biases = tf.Variable(tf.zeros([num_kernels]), 'biases')
    inputs_1 = tf.reshape(inputs, [50, segment_count, segment_dim])
    output = tf.einsum('ijk,kl->ijl', inputs_1, weights) + biases
    temp = tf.reshape(output, [50, segment_count * num_kernels])
    outputs = nonlinearity(temp)
    return outputs, weights
 def test_dim_mismatch(self):
   for axes, input_shapes in self.dim_mismatch_cases:
     inputs = [
       tf.placeholder(tf.float32, shape=shape)
       for shape in input_shapes
     ]
     result = None
     try:
       result = tf.einsum(axes, *inputs)
     except AssertionError:
       pass
     assert result is None, "An exception should have been thrown."
 def test_input_is_placeholder(self):
   with tf.Graph().as_default():
     m0 = tf.placeholder(tf.int32, shape=(1, None))
     m1 = tf.placeholder(tf.int32, shape=(None, 1))
     out = tf.einsum('ij,jk->ik', m0, m1)
     with tf.Session() as sess:
       feed_dict = {
           m0: [[1, 2, 3]],
           m1: [[2], [1], [1]],
       }
       np.testing.assert_almost_equal([[7]],
                                      sess.run(out, feed_dict=feed_dict))
Exemple #15
0
    def __call__(self, inputs, state, scope=None):
        if not isinstance(state, CopyNetWrapperState):
            raise TypeError("Expected state to be instance of CopyNetWrapperState. "
                      "Received type %s instead."  % type(state))
        last_ids = state.last_ids
        prob_c = state.prob_c
        cell_state = state.cell_state

        mask = tf.cast(tf.equal(tf.expand_dims(last_ids, 1),  self._encoder_input_ids), tf.float32)
        mask_sum = tf.reduce_sum(mask, axis=1)
        mask = tf.where(tf.less(mask_sum, 1e-7), mask, mask / tf.expand_dims(mask_sum, 1))
        rou = mask * prob_c
        selective_read = tf.einsum("ijk,ij->ik", self._encoder_states, rou)
        inputs = tf.concat([inputs, selective_read], 1)

        outputs, cell_state = self._cell(inputs, cell_state, scope)
        generate_score = self._projection(outputs)

        copy_score = tf.einsum("ijk,km->ijm", self._encoder_states, self._copy_weight)
        copy_score = tf.nn.tanh(copy_score)

        copy_score = tf.einsum("ijm,im->ij", copy_score, outputs)
        encoder_input_mask = tf.one_hot(self._encoder_input_ids, self._vocab_size)
        expanded_copy_score = tf.einsum("ijn,ij->ij", encoder_input_mask, copy_score)

        prob_g = generate_score
        prob_c = expanded_copy_score
#        mixed_score = tf.concat([generate_score, expanded_copy_score], 1)
#        probs = tf.nn.softmax(mixed_score)
#        prob_g = probs[:, :self._gen_vocab_size]
#        prob_c = probs[:, self._gen_vocab_size:]

        prob_c_one_hot = tf.einsum("ijn,ij->in", encoder_input_mask, prob_c)
        prob_g_total = tf.pad(prob_g, [[0, 0], [0, self._vocab_size - self._gen_vocab_size]])
        outputs = prob_c_one_hot + prob_g_total
        last_ids = tf.argmax(outputs, axis=-1, output_type=tf.int32)
        #prob_c.set_shape([None, self._encoder_state_size])
        state = CopyNetWrapperState(cell_state=cell_state, last_ids=last_ids, prob_c=prob_c)
        return outputs, state
 def cl_loss_from_embedding(self,embedded,return_intermediate=False):
   with tf.device('/gpu:1'):
     output,_ = self.layers['BiLSTM'](embedded)
     output = tf.concat([tf.reshape(output,[-1,2*self.args.rnn_size]),tf.constant(np.zeros((1,2*self.args.rnn_size),dtype=np.float32))],0)
     
   input_f1 =tf.nn.l2_normalize(tf.reduce_sum(tf.nn.embedding_lookup(output,self.entMentIndex),1),1)
   
   #input_f2 =tf.nn.l2_normalize(tf.reduce_sum(tf.nn.embedding_lookup(output,self.entCtxLeftIndex),1),1)
   
   #input_f3 =tf.nn.l2_normalize(tf.reduce_sum(tf.nn.embedding_lookup(output,self.entCtxRightIndex),1),1)
   
   f2_temp = tf.nn.embedding_lookup(output,self.entCtxLeftIndex)
   f3_temp = tf.nn.embedding_lookup(output,self.entCtxRightIndex)
   
   f2_atten = tf.nn.softmax(tf.einsum('aij,ajk->aik', f2_temp, tf.expand_dims(input_f1,-1)),-1)  #Batch matrix multiplication
   f3_atten = tf.nn.softmax(tf.einsum('aij,ajk->aik', f3_temp, tf.expand_dims(input_f1,-1)),-1) 
   
   input_f2 = tf.einsum('aij,ajk->aik',tf.transpose(f2_temp,[0,2,1]),f2_atten)[:,:,0]
   input_f3 = tf.einsum('aij,ajk->aik',tf.transpose(f3_temp,[0,2,1]),f3_atten)[:,:,0]
   
   print 'f2_input:',input_f2
   print 'f3_input:',input_f3
   
   input_ctx = tf.concat([input_f2,input_f3],1)
   
   if self.args.dropout:  #dropout position is here!
     input_f1 =  tf.nn.dropout(input_f1,self.keep_prob)
     input_ctx =  tf.nn.dropout(input_ctx,self.keep_prob)
       
   prediction_l1_ment = self.layers['fullyConnect_ment'](input_f1,activation_fn=None)
   prediction_ment = tf.matmul(prediction_l1_ment,self.hier)
   
   print 'ment:',prediction_ment
   prediction_ctx = self.layers['fullyConnect_ctx'](input_ctx,activation_fn=None)
   print 'ctx:',prediction_ctx
   prediction = tf.nn.sigmoid(prediction_ment + prediction_ctx)
   
   loss = tf.reduce_mean(layers_lib.classification_loss('figer',self.dense_outputdata,prediction))
   return prediction,loss
def concat_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                     scope='concat-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*[a_i; b_j])).  v is a learnable vector and W is a learnable
    matrix.  The rows of attn are softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        a = tf.expand_dims(a, 2)
        b = tf.expand_dims(b, 1)
        c = tf.concat([a, b], axis=3)
        W = tf.get_variable(
            name='matmul_weights',
            initializer=tf.contrib.layers.variance_scaling_initializer(),
            shape=[shape(c, -1), hidden_units]
        )
        cW = tf.einsum('ijkl,lm->ijkm', c, W)
        v = tf.get_variable(
            name='dot_weights',
            initializer=tf.ones_initializer(),
            shape=[hidden_units]
        )
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(cW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
Exemple #18
0
    def __call__(self, x, stop_params_gradient=False, is_eval=True, ensemble_idxs=None, pre_expanded=None, reduce_mode="none"):
        if pre_expanded is None: pre_expanded = ensemble_idxs is not None
        if ensemble_idxs is None:
            ensemble_idxs = tf.random_shuffle(tf.range(self.ensemble_size))
            ensemble_sample_n = self.eval_sample_count if is_eval else self.train_sample_count
            ensemble_idxs = ensemble_idxs[:ensemble_sample_n]
        else:
            ensemble_sample_n = tf.shape(ensemble_idxs)[0]

        weights = [tf.gather(w, ensemble_idxs, axis=0) for w in self.weights]
        biases = [tf.expand_dims(tf.gather(b, ensemble_idxs, axis=0),0) for b in self.biases]

        original_shape = tf.shape(x)
        if pre_expanded: h = tf.reshape(x, [-1, ensemble_sample_n, self.in_size])
        else:            h = tf.tile(tf.reshape(x, [-1, 1, self.in_size]), [1, ensemble_sample_n, 1])
        for layer_i in range(self.layers):
            nonlinearity = tf.nn.relu if layer_i + 1 < self.layers else self.final_nonlinearity
            if stop_params_gradient: h = nonlinearity(tf.einsum('bri,rij->brj', h, tf.stop_gradient(weights[layer_i])) + tf.stop_gradient(biases[layer_i]))
            else:                    h = nonlinearity(tf.einsum('bri,rij->brj', h, weights[layer_i]) + biases[layer_i])

        if pre_expanded:
            if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant(self.out_shape)], -1))
            else:                       h = tf.reshape(h, original_shape[:-1])
        else:
            if len(self.out_shape) > 0: h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant([ensemble_sample_n]), tf.constant(self.out_shape)], -1))
            else:                       h = tf.reshape(h, tf.concat([original_shape[:-1], tf.constant([ensemble_sample_n])], -1))

        if reduce_mode == "none":
            pass
        elif reduce_mode == "random":
            if len(self.out_shape) > 0: h = tf.reduce_sum(h * tf.reshape(tf.one_hot(tf.random_uniform([tf.shape(h)[0]], 0, ensemble_sample_n, dtype=tf.int64), ensemble_sample_n), tf.concat([tf.shape(h)[:1], tf.ones_like(tf.shape(h)[1:-2]), tf.constant([ensemble_sample_n]), tf.constant([1])], 0)), -2)
            else:                       h = tf.reduce_sum(h * tf.reshape(tf.one_hot(tf.random_uniform([tf.shape(h)[0]], 0, ensemble_sample_n, dtype=tf.int64), ensemble_sample_n), tf.concat([tf.shape(h)[:1], tf.ones_like(tf.shape(h)[1:-1]), tf.constant([ensemble_sample_n])], 0)), -1)
        elif reduce_mode == "mean":
            if len(self.out_shape) > 0: h = tf.reduce_mean(h, -2)
            else:                       h = tf.reduce_mean(h, -1)
        else: raise Exception("use a valid reduce mode: none, random, or mean")

        return h
  def test_invalid(self):
    for axes in self.invalid_cases:
      result = None
      inputs = [
        tf.placeholder(tf.float32, shape=(3,4)),
        tf.placeholder(tf.float32, shape=(3,4)),
      ]

      try:
        result = tf.einsum(axes, *inputs)
      except AssertionError as e:
        print(e)
      assert result is None, \
        "An exception should have been thrown."
Exemple #20
0
 def fit(self, x=None, y=None):
   # p(coeffs | x, y) = Normal(coeffs |
   #   mean = (1/noise_variance) (1/noise_variance x^T x + I)^{-1} x^T y,
   #   covariance = (1/noise_variance x^T x + I)^{-1})
   # TODO(trandustin): We newly fit the data at each call. Extend to do
   # Bayesian updating.
   kernel_matrix = tf.matmul(x, x, transpose_a=True) / self.noise_variance
   coeffs_precision = tf.matrix_set_diag(
       kernel_matrix, tf.matrix_diag_part(kernel_matrix) + 1.)
   coeffs_precision_tril = tf.linalg.cholesky(coeffs_precision)
   self.coeffs_precision_tril_op = tf.linalg.LinearOperatorLowerTriangular(
       coeffs_precision_tril)
   self.coeffs_mean = self.coeffs_precision_tril_op.solvevec(
       self.coeffs_precision_tril_op.solvevec(tf.einsum('nm,n->m', x, y)),
       adjoint=True) / self.noise_variance
   # TODO(trandustin): To be fully Keras-compatible, return History object.
   return
  def __init__(self, sess, config, name, is_train):
    self.sess = sess
    self.name = name
    self.is_train = is_train


    self.X_hsd = tf.placeholder(tf.float32, shape=[config.batch_size, config.im_size, config.im_size, 3], name="original_color_image")
    self.D, h_s = tf.split(self.X_hsd,[1,2], axis=3)

    self.E_Step = CNN("E_Step", config, is_train=self.is_train)
    self.Gama = self.E_Step(self.D)
    self.loss, self.Mu, self.Std = GMM_M_Step(self.X_hsd, self.Gama, config.ClusterNo, name='GMM_Statistics')
    
    if self.is_train:

      self.optim = tf.train.AdamOptimizer(config.lr)
      self.train = self.optim.minimize(self.loss, var_list=self.E_Step.Param)

    ClsLbl = tf.arg_max(self.Gama, 3)
    ClsLbl = tf.cast(ClsLbl, tf.float32)
    
    ColorTable = [[255,0,0],[0,255,0],[0,0,255],[255,255,0], [0,255,255], [255,0,255]]
    colors = tf.cast(tf.constant(ColorTable), tf.float32)
    Msk = tf.tile(tf.expand_dims(ClsLbl, axis=3),[1,1,1,3])
    for k in range(0, config.ClusterNo):
        ClrTmpl = tf.einsum('anmd,df->anmf', tf.expand_dims(tf.ones_like(ClsLbl), axis=3), tf.reshape(colors[k,...],[1,3]))
        Msk = tf.where(tf.equal(Msk,k), ClrTmpl, Msk)
    
    
    self.X_rgb = utils.HSD2RGB(self.X_hsd)
    tf.summary.image("1.Input_image", self.X_rgb*255.0, max_outputs=2)
    tf.summary.image("2.Gamma_image",  Msk, max_outputs=2)
    tf.summary.image("3.Density_image", self.D*255.0, max_outputs=2)
    tf.summary.scalar("loss", self.loss)

    self.summary_op = tf.summary.merge_all()

    self.saver = tf.train.Saver()
    self.summary_writer = tf.summary.FileWriter(config.logs_dir, self.sess.graph)

    self.sess.run(tf.global_variables_initializer())
    
    ckpt = tf.train.get_checkpoint_state(config.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        print("Model restored...")
Exemple #22
0
 def call(self, inputs):
   if self.coeffs_mean is None and self.coeffs_precision_tril_op is None:
     # p(mean(ynew) | xnew) = Normal(ynew | mean = 0, variance = xnew xnew^T)
     predictive_mean = 0.
     predictive_variance = tf.reduce_sum(tf.square(inputs), -1)
   else:
     # p(mean(ynew) | xnew, x, y) = Normal(ynew |
     #   mean = xnew (1/noise_variance) (1/noise_variance x^T x + I)^{-1}x^T y,
     #   variance = xnew (1/noise_variance x^T x + I)^{-1} xnew^T)
     predictive_mean = tf.einsum('nm,m->n', inputs, self.coeffs_mean)
     predictive_covariance = tf.matmul(
         inputs,
         self.coeffs_precision_tril_op.solve(
             self.coeffs_precision_tril_op.solve(inputs, adjoint_arg=True),
             adjoint=True))
     predictive_variance = tf.diag_part(predictive_covariance)
   return ed.Normal(loc=predictive_mean, scale=tf.sqrt(predictive_variance))
Exemple #23
0
def n_dimensional_weightmul(L, W, L_shape, Lout_shape, first_dim_of_l_is_batch=True):
  """ Equivalent to matmul(W,L)
      but works for L with larger shapes than 1
      L_shape and Lout_shape are excluding the batch dimension (0)"""
  if not first_dim_of_l_is_batch:
    raise NotImplementedError
  if len(L_shape) == 1 and len(Lout_shape) == 1:
    return tf.matmul(L, W)
  # L    : ?xN1xN2xN3x...
  # Lout : ?xM1xM2xM3x...
  # W    : N1xN2x...xM1xM2x...
  # Einstein notation: letter b (denotes batch dimension)
  # Lout_blmn... = L_bijk... * Wijk...lmn...
  letters = list('ijklmnopqrst')
  l_subscripts = ''.join([letters.pop(0) for _ in range(len(L_shape))])
  lout_subscripts   = ''.join([letters.pop(0) for _ in range(len(Lout_shape))])
  einsum_string = 'b'+l_subscripts+','+l_subscripts+lout_subscripts+'->'+'b'+lout_subscripts
  return tf.einsum(einsum_string,L,W)
Exemple #24
0
  def _compute_covariances(self, emission_weights, emission_variances):
    """Compute all covariance matrices.

    Computes the covaraince matrix for the latent variables, the observations,
    and the covariance between the latents and observations.

    Args:
      emission_weights: A Tensor of shape [num_timesteps] containing
        the emission distribution weights at each timestep.
      emission_variances: A Tensor of shape [num_timesteps] containing
        the emiision distribution variances at each timestep.
    """
    # Compute the marginal variance of each latent.
    z_variances = [self.transition_variances.read(0)]
    for i in range(1, self.num_timesteps):
      z_variances.append(
          z_variances[i-1] * tf.square(self.transition_weights.read(i-1)) +
          self.transition_variances.read(i))
    # Compute the latent covariance matrix.
    sigma_z = []
    for i in range(self.num_timesteps):
      sigma_z_row = []
      for j in range(self.num_timesteps):
        if i == j:
          sigma_z_row.append(z_variances[i])
          continue
        min_ind = min(i, j)
        max_ind = max(i, j)
        weight = tf.reduce_prod(
            self.transition_weights.gather(tf.range(min_ind, max_ind)))
        sigma_z_row.append(z_variances[min_ind] * weight)
      sigma_z.append(tf.stack(sigma_z_row))
    self.sigma_z = tf.stack(sigma_z)
    # Compute the observation covariance matrix.
    x_weights_outer = tf.einsum("i,j->ij", emission_weights, emission_weights)
    self.sigma_x = x_weights_outer * self.sigma_z + tf.diag(emission_variances)
    # Compute the latent - observation covariance matrix.
    # The first axis will index latents, the second axis will index observtions.
    self.sigma_zx = emission_weights[tf.newaxis, :] * self.sigma_z
    self.obs_dist = tfd.MultivariateNormalFullCovariance(
        loc=tf.zeros([self.num_timesteps], dtype=tf.float32),
        covariance_matrix=self.sigma_x)
Exemple #25
0
def log_blend(inputs, weights):
  """Blends state in the log space.

  Args:
    inputs: A set of scalar states, one for each particle in each particle filter.
      Should be [num_samples, batch_size].
    weights: A set of weights used to blend the state. Each set of weights
      should be of dimension [num_samples] (one weight for each previous particle).
      There should be one set of weights for each new particle in each particle filter.
      Thus the shape should be [num_samples, batch_size, num_samples] where
      the first axis indexes new particle and the last axis indexes old particles.
  Returns:
    blended: The blended states, a tensor of shape [num_samples, batch_size].
  """
  raw_max = tf.reduce_max(inputs, axis=0, keepdims=True)
  my_max = tf.stop_gradient(
      tf.where(tf.is_finite(raw_max), raw_max, tf.zeros_like(raw_max))
  )
  # Don't ask.
  blended = tf.log(tf.einsum("ijk,kj->ij", weights, tf.exp(inputs - raw_max))) + my_max
  return blended
  def run_test(self, axes):
    all_axes = {ax: np.random.randint(4, 12)
                for ax in axes if ax.isalpha()}

    input_vals = []
    input_axes, _, _ = axes.partition('->')

    for idx in input_axes.split(','):
      shape = [all_axes[ax] for ax in idx]
      input_vals.append(np.random.random(shape))

    input_tensors = [tf.constant(val) for val in input_vals]
    output_tensor = tf.einsum(axes, *input_tensors)

    with self.test_session():
      output_value = output_tensor.eval()

    correct_value = np.einsum(axes, *input_vals)

    err = np.abs(correct_value - output_value).max()
    print(axes, err)
    assert err < 1e-8
def inference(x, q, n_batch,
              vocab_size=None,
              embedding_dim=None,
              story_maxlen=None,
              question_maxlen=None):
    def weight_variable(shape, stddev=0.08):
        initial = tf.truncated_normal(shape, stddev=stddev)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial)

    A = weight_variable([vocab_size, embedding_dim])
    B = weight_variable([vocab_size, embedding_dim])
    C = weight_variable([vocab_size, question_maxlen])
    m = tf.nn.embedding_lookup(A, x)
    u = tf.nn.embedding_lookup(B, q)
    c = tf.nn.embedding_lookup(C, x)
    p = tf.nn.softmax(tf.einsum('ijk,ilk->ijl', m, u))
    o = tf.add(p, c)
    o = tf.transpose(o, perm=[0, 2, 1])
    ou = tf.concat([o, u], axis=-1)

    cell = tf.contrib.rnn.BasicLSTMCell(embedding_dim//2, forget_bias=1.0)
    initial_state = cell.zero_state(n_batch, tf.float32)
    state = initial_state
    outputs = []
    with tf.variable_scope('LSTM'):
        for t in range(question_maxlen):
            if t > 0:
                tf.get_variable_scope().reuse_variables()
            (cell_output, state) = cell(ou[:, t, :], state)
            outputs.append(cell_output)
    output = outputs[-1]
    W = weight_variable([embedding_dim//2, vocab_size], stddev=0.01)
    a = tf.nn.softmax(tf.matmul(output, W))

    return a
  def _mean(self):
    with tf.control_dependencies(self._runtime_assertions):
      probs = self._marginal_hidden_probs()
      # probs :: num_steps batch_shape num_states
      means = self._observation_distribution.mean()
      # means :: observation_batch_shape[:-1] num_states
      #          observation_event_shape
      means_shape = tf.concat(
          [self.batch_shape_tensor(),
           [self._num_states],
           self._observation_distribution.event_shape_tensor()],
          axis=0)
      means = tf.broadcast_to(means, means_shape)
      # means :: batch_shape num_states observation_event_shape

      observation_event_shape = (
          self._observation_distribution.event_shape_tensor())
      batch_size = tf.reduce_prod(self.batch_shape_tensor())
      flat_probs_shape = [self._num_steps, batch_size, self._num_states]
      flat_means_shape = [
          batch_size,
          self._num_states,
          tf.reduce_prod(observation_event_shape)]

      flat_probs = tf.reshape(probs, flat_probs_shape)
      # flat_probs :: num_steps batch_size num_states
      flat_means = tf.reshape(means, flat_means_shape)
      # flat_means :: batch_size num_states observation_event_size
      flat_mean = tf.einsum("ijk,jkl->jil", flat_probs, flat_means)
      # flat_mean :: batch_size num_steps observation_event_size
      unflat_mean_shape = tf.concat(
          [self.batch_shape_tensor(),
           [self._num_steps],
           observation_event_shape],
          axis=0)
      # returns :: batch_shape num_steps observation_event_shape
      return tf.reshape(flat_mean, unflat_mean_shape)
def mul_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
                            params, tie_projs,
                            initializer=None, proj_initializer=None,
                            div_val=1, perms=None, proj_same_dim=True,
                            scope='adaptive_softmax',
                            **kwargs):
  def _logit(x, W, b, proj):
    y = x
    if x.shape.ndims == 3:
      if proj is not None:
        y = tf.einsum('ibd,ed->ibe', y, proj)
      return tf.einsum('ibd,nd->ibn', y, W) + b
    else:
      if proj is not None:
        y = tf.einsum('id,ed->ie', y, proj)
      return tf.einsum('id,nd->in', y, W) + b

  params_W, params_projs = params[0], params[1]

  with tf.variable_scope(scope):
    if len(cutoffs) == 0:
      softmax_b = tf.get_variable('bias', [n_token],
                                  initializer=tf.zeros_initializer())
      output = _logit(hidden, params_W, softmax_b, params_projs)
      nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
                                                           logits=output)
      nll = tf.reduce_mean(nll)
    else:
      total_loss, total_cnt = 0, 0
      cutoff_ends = [0] + cutoffs + [n_token]
      for i in range(len(cutoff_ends) - 1):
        with tf.variable_scope('cutoff_{}'.format(i)):
          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]

          cur_d_embed = d_embed // (div_val ** i)

          if div_val == 1:
            cur_W = params_W[l_idx: r_idx]
          else:
            cur_W = params_W[i]
          cur_b = tf.get_variable('b', [r_idx - l_idx],
                                  initializer=tf.zeros_initializer())
          if tie_projs[i]:
            if div_val == 1:
              cur_proj = params_projs
            else:
              cur_proj = params_projs[i]
          else:
            if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
              cur_proj = None
            else:
              cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
                                         initializer=proj_initializer)

          if i == 0:
            cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
                                        initializer=tf.zeros_initializer())
            cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
                                        initializer=tf.zeros_initializer())
            cur_W = tf.concat([cur_W, cluster_W], 0)
            cur_b = tf.concat([cur_b, cluster_b], 0)

            head_logit = _logit(hidden, cur_W, cur_b, cur_proj)

            head_target = kwargs.get("head_target")
            head_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=head_target,
                logits=head_logit)

            masked_loss = head_nll * perms[i]
            total_loss += tf.reduce_sum(masked_loss)
            total_cnt += tf.reduce_sum(perms[i])

            # head_logprob = tf.nn.log_softmax(head_logit)

            # final_logprob = head_logprob * perms[i][:, :, None]
            # final_target = tf.one_hot(target, tf.shape(head_logprob)[2])
            # total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target)
            # total_cnt += tf.reduce_sum(perms[i])
          else:
            cur_head_nll = tf.einsum('ib,ibk->k', head_nll, perms[i])

            cur_hidden = tf.einsum('ibd,ibk->kd', hidden, perms[i])
            tail_logit = _logit(cur_hidden, cur_W, cur_b, cur_proj)

            tail_target = tf.einsum('ib,ibk->k', tf.to_float(target - l_idx),
                                    perms[i])
            tail_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.to_int32(tail_target),
                logits=tail_logit)

            sum_nll = cur_head_nll + tail_nll
            mask = tf.reduce_sum(perms[i], [0, 1])

            masked_loss = sum_nll * mask
            total_loss += tf.reduce_sum(masked_loss)
            total_cnt += tf.reduce_sum(mask)

      nll = total_loss / total_cnt

  return nll
 def outer(self, tensor_in_1, tensor_in_2):
     tensor_in_1 = (tensor_in_1 if tensor_in_1.dtype != tf.bool else
                    tf.cast(tensor_in_1, tf.float32))
     tensor_in_1 = (tensor_in_1 if tensor_in_2.dtype != tf.bool else
                    tf.cast(tensor_in_2, tf.float32))
     return tf.einsum('i,j->ij', tensor_in_1, tensor_in_2)
def insert_state(state, system, state_is_pure, mode=None, batched=False):
    """
    Append a new mode (at slot 'mode') to system and initialize it in 'state'.
    If 'mode' is not specified or is greater than the largest current mode number, the new mode is added to the end.
    If an integer within [0,...,N-1] is given for 'mode' (where N is the number of modes) in 'system,'
    then the new state is put in the corresponding mode, and all following modes are shifted to the right by one.
    """
    # pylint: disable=too-many-branches
    num_indices = len(system.shape)
    if batched:
        batch_offset = 1
    else:
        batch_offset = 0
    if state_is_pure:
        num_modes = num_indices - batch_offset
    else:
        num_modes = (num_indices - batch_offset) // 2

    if mode is None or mode >= num_modes:
        mode = num_modes

    if len(system.shape) == 0:  # pylint: disable=len-as-condition
        # no modes in system
        # pylint: disable=no-else-return
        if len(state.shape) - batch_offset == 1:
            if state_is_pure:
                return state
            else:
                return mixed(state)
        elif len(state.shape) - batch_offset == 2:
            return state
        else:
            raise ValueError(
                "'state' must have dim={} or dim={}".format(1 - batch_offset, 2 - batch_offset)
            )
    else:
        # modes in system
        if len(state.shape) == batch_offset + 1 and state_is_pure:
            # everything is pure
            # basic form:
            # 'ab...ln...yz,m->ab...lmn...yz' ('m' indices belong to bra of mode being inserted)
            mode_size = 1
        else:
            # everything is mixed
            # basic form:
            # 'abcd...klop...wxyz,mn->abcd...klmnop...wxyz' ('mn' indices belong to bra/ket of mode being inserted)
            mode_size = 2

        batch_index = indices[:batch_offset]
        left_part = indices[batch_offset : batch_offset + mode * mode_size]
        middle_part = indices[
            batch_offset + mode * mode_size : batch_offset + (mode + 1) * mode_size
        ]
        right_part = indices[
            batch_offset + (mode + 1) * mode_size : batch_offset + (num_modes + 1) * mode_size
        ]
        eqn_lhs = batch_index + left_part + right_part + "," + batch_index + middle_part
        eqn_rhs = batch_index + left_part + middle_part + right_part
        eqn = eqn_lhs + "->" + eqn_rhs
        revised_modes = tf.einsum(eqn, system, state)

    return revised_modes
def dam_model(input_x,
              input_x_mask,
              input_y,
              input_y_mask,
              word_emb,
              keep_rate,
              conf,
              x_len=None,
              y_len=None):

    Hr = tf.nn.embedding_lookup(word_emb, input_y)

    if conf['is_positional'] and conf['stack_num'] > 0:
        with tf.variable_scope('positional'):
            Hr = op.positional_encoding_vector(Hr, max_timescale=10)
    Hr_stack = [Hr]

    for index in range(conf['stack_num']):
        with tf.variable_scope('self_stack_cr_' + str(index)):
            Hr = layers.block(Hr, Hr, Hr, Q_lengths=y_len, K_lengths=y_len)
            Hr_stack.append(Hr)

    #context part
    #a list of length max_turn_num, every element is a tensor with shape [batch, max_turn_len]
    list_turn_t = tf.unstack(input_x, axis=1)
    list_turn_length = tf.unstack(x_len, axis=1)

    sim_turns = []
    #for every turn_t calculate matching vector
    for turn_t, t_turn_length in zip(list_turn_t, list_turn_length):
        Hu = tf.nn.embedding_lookup(word_emb,
                                    turn_t)  #[batch, max_turn_len, emb_size]

        if conf['is_positional'] and conf['stack_num'] > 0:
            with tf.variable_scope('positional', reuse=True):
                Hu = op.positional_encoding_vector(Hu, max_timescale=10)
        Hu_stack = [Hu]

        for index in range(conf['stack_num']):

            with tf.variable_scope('self_stack_cr_' + str(index), reuse=True):
                Hu = layers.block(Hu,
                                  Hu,
                                  Hu,
                                  Q_lengths=t_turn_length,
                                  K_lengths=t_turn_length)

                Hu_stack.append(Hu)

        r_a_t_stack = []
        t_a_r_stack = []
        for index in range(conf['stack_num'] + 1):

            with tf.variable_scope('t_attend_r_cr_' + str(index)):
                try:
                    t_a_r = layers.block(Hu_stack[index],
                                         Hr_stack[index],
                                         Hr_stack[index],
                                         Q_lengths=t_turn_length,
                                         K_lengths=y_len)
                except ValueError:
                    tf.get_variable_scope().reuse_variables()
                    t_a_r = layers.block(Hu_stack[index],
                                         Hr_stack[index],
                                         Hr_stack[index],
                                         Q_lengths=t_turn_length,
                                         K_lengths=y_len)

            with tf.variable_scope('r_attend_t_cr_' + str(index)):
                try:
                    r_a_t = layers.block(Hr_stack[index],
                                         Hu_stack[index],
                                         Hu_stack[index],
                                         Q_lengths=y_len,
                                         K_lengths=t_turn_length)
                except ValueError:
                    tf.get_variable_scope().reuse_variables()
                    r_a_t = layers.block(Hr_stack[index],
                                         Hu_stack[index],
                                         Hu_stack[index],
                                         Q_lengths=y_len,
                                         K_lengths=t_turn_length)

            t_a_r_stack.append(t_a_r)
            r_a_t_stack.append(r_a_t)

        t_a_r_stack.extend(Hu_stack)
        r_a_t_stack.extend(Hr_stack)

        t_a_r = tf.stack(t_a_r_stack, axis=-1)
        r_a_t = tf.stack(r_a_t_stack, axis=-1)

        #calculate similarity matrix
        with tf.variable_scope('similarity'):
            # sim shape [batch, max_turn_len, max_turn_len, 2*stack_num+1]
            # divide sqrt(200) to prevent gradient explosion
            sim = tf.einsum('biks,bjks->bijs', t_a_r, r_a_t) / tf.sqrt(200.0)

        sim_turns.append(sim)

    #cnn and aggregation
    sim = tf.stack(sim_turns, axis=1)
    print('sim shape: %s' % sim.shape)
    with tf.variable_scope('cnn_aggregation'):
        final_info = layers.CNN_3d(sim, 32, 16)
        #for douban
        #final_info = layers.CNN_3d(sim, 16, 16)

    return final_info
def two_mode_gate(matrix, mode1, mode2, in_modes, pure=True, batched=False):
    """basic form:
    'abcd,efg...b...d...xyz->efg...a...c...xyz' (pure state)
    'abcd,ij...be...dg...xyz,efgh->ij...af...ch...xyz' (mixed state)
    """
    # pylint: disable=too-many-branches,too-many-statements
    if batched:
        batch_offset = 1
    else:
        batch_offset = 0
    batch_index = indices[:batch_offset]
    left_gate_str = indices[batch_offset : batch_offset + 4]  # |a><b| |c><d|
    num_indices = len(in_modes.shape)
    if pure:
        num_modes = num_indices - batch_offset
        mode_size = 1
    else:
        right_gate_str = indices[batch_offset + 4 : batch_offset + 8]  # |e><f| |g><h|
        num_modes = (num_indices - batch_offset) // 2
        mode_size = 2
    max_len = (len(indices) - 4) // mode_size - batch_offset

    if num_modes == 0:
        raise ValueError("'in_modes' must have at least one mode")
    if num_modes > max_len:
        raise NotImplementedError(
            "The max number of supported modes for this operation is currently {}".format(max_len)
        )

    min_mode = min(mode1, mode2)
    max_mode = max(mode1, mode2)
    if min_mode < 0 or max_mode >= num_modes or mode1 == mode2:
        raise ValueError("One or more mode numbers are incompatible")

    other_modes_indices = indices[
        batch_offset + 4 * mode_size : batch_offset + 4 * mode_size + mode_size * (num_modes - 2)
    ]
    # build equation
    if mode1 == min_mode:
        lhs_min_mode_indices = left_gate_str[1]
        lhs_max_mode_indices = left_gate_str[3]
        rhs_min_mode_indices = left_gate_str[0]
        rhs_max_mode_indices = left_gate_str[2]
    else:
        lhs_min_mode_indices = left_gate_str[3]
        lhs_max_mode_indices = left_gate_str[1]
        rhs_min_mode_indices = left_gate_str[2]
        rhs_max_mode_indices = left_gate_str[0]
    if not pure:
        if mode1 == min_mode:
            lhs_min_mode_indices += right_gate_str[0]
            lhs_max_mode_indices += right_gate_str[2]
            rhs_min_mode_indices += right_gate_str[1]
            rhs_max_mode_indices += right_gate_str[3]
        else:
            lhs_min_mode_indices += right_gate_str[2]
            lhs_max_mode_indices += right_gate_str[0]
            rhs_min_mode_indices += right_gate_str[3]
            rhs_max_mode_indices += right_gate_str[1]
    eqn_lhs = "{},{}{}{}{}{}{}".format(
        batch_index + left_gate_str,
        batch_index,
        other_modes_indices[: min_mode * mode_size],
        lhs_min_mode_indices,
        other_modes_indices[min_mode * mode_size : (max_mode - 1) * mode_size],
        lhs_max_mode_indices,
        other_modes_indices[(max_mode - 1) * mode_size :],
    )
    if not pure:
        eqn_lhs += "," + batch_index + right_gate_str
    eqn_rhs = "".join(
        [
            batch_index,
            other_modes_indices[: min_mode * mode_size],
            rhs_min_mode_indices,
            other_modes_indices[min_mode * mode_size : (max_mode - 1) * mode_size],
            rhs_max_mode_indices,
            other_modes_indices[(max_mode - 1) * mode_size :],
        ]
    )
    eqn = eqn_lhs + "->" + eqn_rhs
    einsum_inputs = [matrix, in_modes]
    if not pure:
        if batched:
            transpose_list = [0, 2, 1, 4, 3]
        else:
            transpose_list = [1, 0, 3, 2]
        einsum_inputs.append(tf.math.conj(tf.transpose(matrix, transpose_list)))
    output = tf.einsum(eqn, *einsum_inputs)
    return output
def combine_single_modes(modes_list, batched=False):
    """Group together a list of single modes (each having dim=1 or dim=2) into a composite mode system."""
    if batched:
        batch_offset = 1
    else:
        batch_offset = 0
    num_modes = len(modes_list)
    if num_modes <= 1:
        raise ValueError("'modes_list' must have at least two modes")

    dims = np.array([len(mode.shape) - batch_offset for mode in modes_list])
    if min(dims) < 1 or max(dims) > 2:
        raise ValueError("Each mode in 'modes_list' can only have dim=1 or dim=2")

    if np.all(dims == 1):
        # All modes are represented as pure states.
        # Can return combined state also as pure state.
        # basic form:
        # 'a,b,c,...,x,y,z->abc...xyz'
        max_num = max_num_indices - batch_offset
        if num_modes > max_num:
            raise NotImplementedError(
                "The max number of supported modes for this operation with pure states is currently {}".format(
                    max_num
                )
            )
        batch_index = indices[:batch_offset]
        out_str = indices[batch_offset : batch_offset + num_modes]
        modes_str = ",".join([batch_index + idx for idx in out_str])
        eqn = "{}->{}".format(modes_str, batch_index + out_str)
        einsum_inputs = modes_list
    else:
        # Some modes are mixed.
        # Return combined state as mixed.
        # basic form:
        # e.g., if first mode is pure and second is mixed...
        # 'a,b,cd,...->abcd...'
        # where (a,b) will belong to the first mode (bra & ket)
        # and cd will belong to the second mode (density matrix)
        max_num = (max_num_indices - batch_offset) // 2
        batch_index = indices[:batch_offset]
        if num_modes > max_num:
            raise NotImplementedError(
                "The max number of supported modes for this operation with mixed states is currently {}".format(
                    max_num
                )
            )
        mode_idxs = [
            indices[slice(batch_offset + idx, batch_offset + idx + 2)]
            for idx in range(0, 2 * num_modes, 2)
        ]  # each mode gets a pair of consecutive indices
        eqn_rhs = batch_index + "".join(mode_idxs)
        eqn_idxs = [
            batch_index + m if dims[idx] == 2 else ",".join(m) for idx, m in enumerate(mode_idxs)
        ]
        eqn_lhs = ",".join(eqn_idxs)
        eqn = eqn_lhs + "->" + eqn_rhs
        einsum_inputs = []
        for idx, mode in enumerate(modes_list):
            if dims[idx] == 1:
                new_inputs = [mode, tf.math.conj(mode)]
            elif dims[idx] == 2:
                new_inputs = [mode]
            einsum_inputs += new_inputs
    combined_modes = tf.einsum(eqn, *einsum_inputs)
    return combined_modes
Exemple #35
0
def rel_multihead_attn(w,
                       r,
                       r_w_bias,
                       r_r_bias,
                       attn_mask,
                       mems,
                       d_model,
                       n_head,
                       d_head,
                       dropout,
                       dropatt,
                       is_training,
                       kernel_initializer,
                       scope='rel_attn'):
    scale = 1 / (d_head**0.5)
    with tf.variable_scope(scope):
        qlen = tf.shape(w)[0]
        rlen = tf.shape(r)[0]
        bsz = tf.shape(w)[1]

        cat = tf.concat([mems, w],
                        0) if mems is not None and mems.shape.ndims > 1 else w
        w_heads = tf.layers.dense(cat,
                                  3 * n_head * d_head,
                                  use_bias=False,
                                  kernel_initializer=kernel_initializer,
                                  name='qkv')
        r_head_k = tf.layers.dense(r,
                                   n_head * d_head,
                                   use_bias=False,
                                   kernel_initializer=kernel_initializer,
                                   name='r')

        w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, -1)
        w_head_q = w_head_q[-qlen:]

        klen = tf.shape(w_head_k)[0]

        w_head_q = tf.reshape(w_head_q, [qlen, bsz, n_head, d_head])
        w_head_k = tf.reshape(w_head_k, [klen, bsz, n_head, d_head])
        w_head_v = tf.reshape(w_head_v, [klen, bsz, n_head, d_head])

        r_head_k = tf.reshape(r_head_k, [rlen, n_head, d_head])

        rw_head_q = w_head_q + r_w_bias
        rr_head_q = w_head_q + r_r_bias

        # qlen, bsz, n_head, d_head , klen, bsz, n_head, d_head ->  qlen,klen,bsz,n_head
        AC = tf.einsum('ibnd,jbnd->ijbn', rw_head_q, w_head_k)
        BD = tf.einsum('ibnd,jnd->ijbn', rr_head_q, r_head_k)
        BD = rel_shift(BD)

        attn_score = (AC + BD) * scale
        attn_mask_t = attn_mask[:, :, None, None]
        attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t

        attn_prob = tf.nn.softmax(attn_score, 1)
        attn_prob = tf.layers.dropout(attn_prob, dropatt, training=is_training)

        attn_vec = tf.einsum('ijbn,jbnd->ibnd', attn_prob, w_head_v)
        size_t = tf.shape(attn_vec)
        attn_vec = tf.reshape(attn_vec,
                              [size_t[0], size_t[1], n_head * d_head])

        attn_out = tf.layers.dense(attn_vec,
                                   d_model,
                                   use_bias=False,
                                   kernel_initializer=kernel_initializer,
                                   name='o')
        attn_out = tf.layers.dropout(attn_out, dropout, training=is_training)

        output = tf.contrib.layers.layer_norm(attn_out + w, begin_norm_axis=-1)
    return output
    def fidelity_coherent(self, alpha_list, **kwargs):
        r"""
        Compute the fidelity of the state with the coherent states specified by alpha_list. May be numerical or symbolic.

        Args:
            alpha_list (Sequence[complex]): list of coherence parameter values, one for each mode
            **kwargs: Optional keyword arguments.

                * If this contains the key
                  ``eval``, then the corresponding argument will be used to determine the return behaviour of this function.
                  When ``eval=True``, the return value is numerical; when ``eval=False``, it is symbolic.
                * If eval is not present in kwargs, then state falls back to the an internal evaluation behaviour,
                  which is specified at initialization.
                * A Tensorflow Session or feed_dict may also be passed via the keys ``session`` or ``feed_dict``, respectively.
                  If a Session is supplied, then ``eval`` is overriden and the numerical evaluation takes place in the provided Session.
                  If session and/or feed_dict are not given, then a temporary session and/or empty feed_dict will be used.
        Returns:
            float/Tensor: the numerical value, or an unevaluated Tensor object, for the fidelity :math:`\bra{\vec{\alpha}}\rho\ket{\vec{\alpha}}`.
        """
        with self.graph.as_default():
            if not hasattr(alpha_list, "__len__"):
                alpha_list = [alpha_list]

            if len(alpha_list) != self.num_modes:
                raise ValueError(
                    "The number of alpha values must match the number of modes."
                )

            max_indices = (len(indices) - 1) // 2
            if len(alpha_list) > max_indices:
                raise ValueError(
                    "Length of `alpha_list` exceeds supported number of modes."
                )

            s = self.data
            if not self.batched:
                s = tf.expand_dims(s, 0)  # introduce fake batch dimension

            coh = lambda a, dim: [
                np.exp(-0.5 * np.abs(a)**2) * (a)**n / np.sqrt(factorial(n))
                for n in range(dim)
            ]
            multi_cohs_list = [coh(a, self.cutoff_dim) for a in alpha_list
                               ]  # shape is: [num_modes, cutoff_dim]
            eqn = ",".join(
                indices[:self._modes]) + "->" + indices[:self._modes]
            multi_cohs_vec = np.einsum(
                eqn, *multi_cohs_list
            )  # tensor product of specified coherent states
            flat_multi_cohs = np.reshape(
                multi_cohs_vec, [1, self.cutoff_dim**self.num_modes]
            )  # flattened tensor product; shape is: [1, cutoff_dim * num_modes]

            if self.is_pure:
                flat_state = tf.reshape(s,
                                        [-1, self.cutoff_dim**self.num_modes])
                ovlap = tf.reduce_sum(flat_multi_cohs.conj() * flat_state,
                                      axis=1)
                f = tf.abs(ovlap)**2
            else:
                batch_index = indices[0]
                free_indices = indices[1:]
                bra_indices = free_indices[:self.num_modes]
                ket_indices = free_indices[self.num_modes:2 * self.num_modes]
                eqn = (bra_indices + "," + batch_index +
                       "".join(bra_indices[idx] + ket_indices[idx]
                               for idx in range(self.num_modes)) + "," +
                       ket_indices + "->" + batch_index)
                f = tf.einsum(
                    eqn,
                    tf.convert_to_tensor(np.conj(multi_cohs_vec),
                                         dtype=def_type),
                    s,
                    tf.convert_to_tensor(multi_cohs_vec, def_type),
                )
            if not self.batched:
                f = tf.squeeze(f, 0)  # drop fake batch dimension

            f = tf.identity(f, name="fidelity_coherent")
            f = self._run(f, **kwargs)

            return f
    def buildGamStep0(self, ListWeightUZ, ListBiasUZ, ListWeightGam,
                      ListBiasGam, Gam0_initializer):
        dic = {}
        dic["LRate"] = tf.compat.v1.placeholder(tf.float32,
                                                shape=[],
                                                name="learning_rate")
        dic["RandG"] = tf.compat.v1.placeholder(
            dtype=tf.float32,
            shape=[None, self.d, self.nbStepGam],
            name='randG')
        dic["Gam0"] = tf.compat.v1.get_variable("Gam0", [self.d, self.d],
                                                tf.float32, Gam0_initializer)
        sample_size = tf.shape(dic["RandG"])[0]
        sig = self.model.sigScal
        mu = self.model.muScal
        sqrtDt = np.sqrt(self.TStepGam)
        XPrev = tf.tile(
            tf.expand_dims(tf.convert_to_tensor(self.xInit, dtype=tf.float32),
                           axis=0), [sample_size, 1])
        XNext = XPrev
        XNextAnti = XNext
        GamTraj = tf.zeros([sample_size, self.d, self.d])
        WAccul = tf.zeros([sample_size, self.d])
        TAccul = 0.
        for i in range(len(ListWeightGam) - 1):
            iStepLoc = i + 1
            tLoc = (i + 1) * self.TStepGam
            WAccul = WAccul + sqrtDt * dic["RandG"][:, :, i]
            TAccul = TAccul + self.TStepGam
            XNext = XNext + mu * self.TStepGam + sig * sqrtDt * dic[
                "RandG"][:, :, i]
            XNextAnti = XNextAnti + mu * self.TStepGam - sig * sqrtDt * dic[
                "RandG"][:, :, i]
            iPosBSDE = (-i - 1) * self.nbStepGamStab
            print("len( ListWeightGam)", len(ListWeightGam), " ListWeightUZ ",
                  len(ListWeightUZ), " IPO", iPosBSDE)
            normX = (XNext - self.xInit - mu * self.TStepGam * iStepLoc) / (
                sig * np.sqrt(self.TStepGam * iStepLoc))
            U, Z = self.networkUZ.createNetworkNotTrainable(
                normX, iStepLoc, ListWeightUZ[iPosBSDE], ListBiasUZ[iPosBSDE])
            Gam = self.networkGam.createNetworkNotTrainable(
                normX, iStepLoc, ListWeightGam[-i - 1], ListBiasGam[-i - 1])
            driver = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(Gam)) - self.model.fDW(
                    iStepLoc * self.TStepGam, XNext, U, Z, Gam))

            normXAnti = (XNextAnti - self.xInit - mu * self.TStepGam *
                         iStepLoc) / (sig * np.sqrt(self.TStepGam * iStepLoc))
            UAnti, ZAnti = self.networkUZ.createNetworkNotTrainable(
                normXAnti, self.nbStepUDU + iStepLoc, ListWeightUZ[iPosBSDE],
                ListBiasUZ[iPosBSDE])
            GamAnti = self.networkGam.createNetworkNotTrainable(
                normXAnti, self.nbStepUDU + iStepLoc, ListWeightGam[-i - 1],
                ListBiasGam[-i - 1])
            driverAnti = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(GamAnti)) - self.model.fDW(
                    iStepLoc * self.TStepGam, XNextAnti, UAnti, ZAnti,
                    GamAnti))

            normXPrev = (XPrev - self.xInit - mu * self.TStepGam *
                         iStepLoc) / (sig * np.sqrt(self.TStepGam * iStepLoc))
            UPrev, ZPrev = self.networkUZ.createNetworkNotTrainable(
                normXPrev, 2 * self.nbStepUDU + iStepLoc,
                ListWeightUZ[iPosBSDE], ListBiasUZ[iPosBSDE])
            GamPrev = self.networkGam.createNetworkNotTrainable(
                normXPrev, 2 * self.nbStepUDU + iStepLoc,
                ListWeightGam[-i - 1], ListBiasGam[-i - 1])
            driverPrev = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(GamPrev)) - self.model.fDW(
                    iStepLoc * self.TStepGam, XPrev, UPrev, ZPrev, GamPrev))

            weight = (tf.einsum(
                'lij,j->lij',
                tf.einsum('i,lij->lij', tf.constant(1 / sig, dtype=tf.float32),
                          tf.einsum("li,lj->lij", WAccul, WAccul) - TAccul),
                tf.constant(1 / sig, dtype=tf.float32))) / (TAccul * TAccul)
            GamTraj = GamTraj - tf.einsum(
                "l,lij->lij", 0.5 *
                (driver + driverAnti - 2 * driverPrev), weight)

        XNext = XNext + mu * self.TStepGam + sig * sqrtDt * dic[
            "RandG"][:, :, len(ListWeightGam) - 1]
        XNextAnti = XNextAnti + mu * self.TStepGam - sig * sqrtDt * dic[
            "RandG"][:, :, len(ListWeightGam) - 1]
        GamTraj = GamTraj + 0.5 * (self.model.D2gTf(XNext) +
                                   self.model.D2gTf(XNextAnti))

        dic["Loss"] = tf.reduce_mean(tf.pow(dic["Gam0"] - GamTraj, 2))
        dic["train"] = tf.compat.v1.train.AdamOptimizer(
            learning_rate=dic["LRate"]).minimize(dic["Loss"])
        return dic
Exemple #38
0
 def sig(self, t, x):
     return tf.einsum('j,i->ij', tf.constant(self.sigScal,
                                             dtype=tf.float32),
                      tf.ones(shape=tf.shape(x)[0], dtype=tf.float32))
Exemple #39
0
 def fDW(self, t, x, u, Du, D2u):
     return -self.R / 2 * tf.einsum(
         'i,i->i', tf.einsum('ij,ij->i', Du, Du),
         tf.reshape(
             tf.math.reciprocal(D2u), [tf.shape(D2u)[0]])) - tf.einsum(
                 'j,ij->i', tf.constant(self.muScal, dtype=tf.float32), Du)
Exemple #40
0
    def __init__(
        self,
        num_symbols,
        num_embed_units,
        num_units,
        num_layers,
        num_labels,
        embed,
        learning_rate,
        max_gradient_norm=5.0,
        param_da=150,
        param_r=10,
    ):

        self.texts = tf.placeholder(tf.string, (None, None),
                                    "texts")  # shape: [batch, length]

        # todo: implement placeholders
        self.texts_length = tf.placeholder(tf.int32, None,
                                           "texts_length")  # shape: [batch]
        self.labels = tf.placeholder(tf.int32, None,
                                     "labels")  # shape: [batch]

        self.symbol2index = MutableHashTable(
            key_dtype=tf.string,
            value_dtype=tf.int64,
            default_value=UNK_ID,
            shared_name="in_table",
            name="in_table",
            checkpoint=True,
        )

        batch_size = tf.shape(self.texts)[0]
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)

        self.index_input = self.symbol2index.lookup(
            self.texts)  # shape: [batch, length]

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable("embed",
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable("embed",
                                         dtype=tf.float32,
                                         initializer=embed)

        # todo: implement embedding inputs
        self.embed_input = tf.nn.embedding_lookup(
            self.embed,
            self.index_input)  # shape: [batch, length, num_embed_units]

        # todo: implement Multi-layer RNNCell with #num_units neurons and #num_layers layers
        def LSTM():
            return BasicLSTMCell(num_units)

        cells = [LSTM() for i in range(num_layers)]
        cell_fw = MultiRNNCell(cells)
        cell_bw = MultiRNNCell(cells)
        # todo: implement bidirectional RNN
        outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
                                                          cell_bw,
                                                          self.embed_input,
                                                          self.texts_length,
                                                          dtype=tf.float32,
                                                          scope="rnn")
        H = tf.concat(outputs, 2)  # shape: (batch, length, 2*num_units)
        # H = tf.Print(H, [H, tf.shape(H), "H"])

        with tf.variable_scope("logits"):
            # todo: implement self-attention mechanism, feel free to add codes to calculate internal results
            Ws1 = tf.get_variable("Ws1", [2 * num_units, param_da])
            Ws2 = tf.get_variable("Ws2", [param_da, param_r])

            temp = tf.tanh(tf.einsum("aij,jr->air", H, Ws1))
            # temp = tf.Print(temp, [temp, tf.shape(temp), "shape"])
            A = tf.nn.softmax(
                tf.einsum("aij,jr->air", temp,
                          Ws2))  # shape: (batch, param_r*2*num_units)
            # A = tf.Print(A, [A, tf.shape(A), "A"])
            M = tf.reduce_sum(tf.einsum("aij,aik->ajk", A, H), axis=1)
            # M = tf.Print(M, [M, tf.shape(M), "M"])
            logits = tf.layers.dense(
                M, num_labels, activation=None,
                name="projection")  # shape: (batch, num_labels)
            # logits = tf.Print(logits, [logits, tf.shape(logits), "logits"])

        # todo: calculate additional loss, feel free to add codes to calculate internal results
        identity = tf.reshape(
            tf.tile(tf.diag(tf.ones([param_r])), [batch_size, 1]),
            [batch_size, param_r, param_r])
        temp = tf.matmul(A, A, transpose_a=True)
        self.penalized_term = tf.norm(temp - identity)

        self.loss = (
            tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.labels, logits=logits),
                          name="loss") + 0.001 * self.penalized_term)
        predict_labels = tf.argmax(logits, 1, "predict_labels")
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, tf.cast(predict_labels, tf.int32)),
            tf.int32),
                                      name="accuracy")

        self.params = tf.trainable_variables()

        #         global_step = tf.Variable(0, trainable=False)
        #         initial_learning_rate = self.learning_rate
        #         learning_rate = tf.train.exponential_decay(initial_learning_rate,
        #                                                    global_step=global_step,
        #                                                    decay_steps=10,decay_rate=0.9)
        # calculate the gradient of parameters
        # opt = tf.train.AdamOptimizer(learning_rate)
        opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9)
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=5,
                                    pad_step_number=True)
Exemple #41
0
    def call(self, inputs, **kwargs):
        """Implements call() for the layer."""
        unpacked_inputs = tf_utils.unpack_inputs(inputs)
        sequence_output = unpacked_inputs[0]
        p_mask = unpacked_inputs[1]
        cls_index = unpacked_inputs[2]
        start_positions = unpacked_inputs[3]

        _, seq_len, _ = sequence_output.shape.as_list()
        sequence_output = tf.transpose(sequence_output, [1, 0, 2])

        start_logits = self.start_logits_proj_layer(sequence_output)
        start_logits = tf.transpose(tf.squeeze(start_logits, -1), [1, 0])
        start_logits_masked = start_logits * (1 - p_mask) - 1e30 * p_mask
        start_log_probs = tf.nn.log_softmax(start_logits_masked, -1)

        if kwargs.get("training", False):
            # during training, compute the end logits based on the
            # ground truth of the start position
            start_positions = tf.reshape(start_positions, [-1])
            start_index = tf.one_hot(start_positions,
                                     depth=seq_len,
                                     axis=-1,
                                     dtype=tf.float32)
            start_features = tf.einsum('lbh,bl->bh', sequence_output,
                                       start_index)
            start_features = tf.tile(start_features[None], [seq_len, 1, 1])
            end_logits = self.end_logits_proj_layer0(
                tf.concat([sequence_output, start_features], axis=-1))

            end_logits = self.end_logits_layer_norm(end_logits)

            end_logits = self.end_logits_proj_layer1(end_logits)
            end_logits = tf.transpose(tf.squeeze(end_logits, -1), [1, 0])
            end_logits_masked = end_logits * (1 - p_mask) - 1e30 * p_mask
            end_log_probs = tf.nn.log_softmax(end_logits_masked, -1)
        else:
            start_top_log_probs, start_top_index = tf.nn.top_k(
                start_log_probs, k=self.start_n_top)
            start_index = tf.one_hot(start_top_index,
                                     depth=seq_len,
                                     axis=-1,
                                     dtype=tf.float32)
            start_features = tf.einsum('lbh,bkl->bkh', sequence_output,
                                       start_index)
            end_input = tf.tile(sequence_output[:, :, None],
                                [1, 1, self.start_n_top, 1])
            start_features = tf.tile(start_features[None], [seq_len, 1, 1, 1])
            end_input = tf.concat([end_input, start_features], axis=-1)
            end_logits = self.end_logits_proj_layer0(end_input)
            end_logits = tf.reshape(end_logits,
                                    [seq_len, -1, self.hidden_size])
            end_logits = self.end_logits_layer_norm(end_logits)

            end_logits = tf.reshape(
                end_logits, [seq_len, -1, self.start_n_top, self.hidden_size])

            end_logits = self.end_logits_proj_layer1(end_logits)
            end_logits = tf.reshape(end_logits,
                                    [seq_len, -1, self.start_n_top])
            end_logits = tf.transpose(end_logits, [1, 2, 0])
            end_logits_masked = end_logits * (
                1 - p_mask[:, None]) - 1e30 * p_mask[:, None]
            end_log_probs = tf.nn.log_softmax(end_logits_masked, -1)
            end_top_log_probs, end_top_index = tf.nn.top_k(end_log_probs,
                                                           k=self.end_n_top)
            end_top_log_probs = tf.reshape(
                end_top_log_probs, [-1, self.start_n_top * self.end_n_top])
            end_top_index = tf.reshape(end_top_index,
                                       [-1, self.start_n_top * self.end_n_top])

        # an additional layer to predict answerability

        # get the representation of CLS
        cls_index = tf.one_hot(cls_index, seq_len, axis=-1, dtype=tf.float32)
        cls_feature = tf.einsum('lbh,bl->bh', sequence_output, cls_index)

        # get the representation of START
        start_p = tf.nn.softmax(start_logits_masked,
                                axis=-1,
                                name='softmax_start')
        start_feature = tf.einsum('lbh,bl->bh', sequence_output, start_p)

        ans_feature = tf.concat([start_feature, cls_feature], -1)
        ans_feature = self.answer_class_proj_layer0(ans_feature)
        ans_feature = self.ans_feature_dropout(ans_feature,
                                               training=kwargs.get(
                                                   'training', False))
        cls_logits = self.answer_class_proj_layer1(ans_feature)
        cls_logits = tf.squeeze(cls_logits, -1)

        if kwargs.get("training", False):
            return (start_log_probs, end_log_probs, cls_logits)
        else:
            return (start_top_log_probs, start_top_index, end_top_log_probs,
                    end_top_index, cls_logits)
Exemple #42
0
    def build_model(self):
        """
            the model takes in: 
            walk: key, label, neg
            node: key, label, neg
            walks are dealt with directly while nodes will need sampling
        """
        self.neighs_and_types = self.Dataset.types_and_nodes
        print(np.max(self.neighs_and_types[:, :, 0]))
        print(np.max(self.neighs_and_types[:, :, 1]))

        self.batch_keys = tf.placeholder(tf.int32, [None])
        self.batch_labels = tf.placeholder(tf.int32, [None])
        self.batch_negs = tf.placeholder(tf.int32, [None])
        self.batch_input = tf.placeholder(tf.int32, [None])
        self.input_size = tf.placeholder(tf.int32)

        self.key_walks = tf.placeholder(tf.int32, [None])
        self.label_walks = tf.placeholder(tf.int32, [None])
        self.neg_walks = tf.placeholder(tf.int32, [None])

        self.nodes_keys, self.paths_keys = self.sample(self.batch_keys, self.num_neighbor, self.batch_size)
        self.nodes_labels, self.paths_labels = self.sample(self.batch_labels, self.num_neighbor, self.batch_size)
        self.nodes_negs, self.paths_negs = self.sample(self.batch_negs, self.num_neighbor, self.neg_size)
        self.nodes_inputs, self.paths_inputs = self.sample(self.batch_input, self.num_neighbor, self.input_size)

        self.walk_embeddings = tf.get_variable("walk_embeddings", [self.num_anonym_walk_types, self.walk_dim], tf.float64, 
            initializer = tf.contrib.layers.xavier_initializer())
        self.walk_loss = self.compute_walk_loss()

        self.output_keys = self.aggregate(self.nodes_keys, self.paths_keys, self.batch_size)#, compute_regularizer = True)
        self.output_labels = self.aggregate(self.nodes_labels, self.paths_labels, self.batch_size)
        self.output_negs = self.aggregate(self.nodes_negs, self.paths_negs, self.neg_size)
        self.output = self.aggregate(self.nodes_inputs, self.paths_inputs, self.input_size)

        self.output_keys = tf.nn.l2_normalize(self.output_keys, 1)
        self.output_labels = tf.nn.l2_normalize(self.output_labels, 1)
        self.output_negs = tf.nn.l2_normalize(self.output_negs, 1)
        self.output = tf.nn.l2_normalize(self.output, 1)

        pos_aff = tf.reduce_sum(tf.multiply(self.output_keys, self.output_labels), axis = 1)
        neg_aff = tf.einsum("ij,kj->ik", self.output_keys, self.output_negs)
        self.likelihood = tf.log(tf.sigmoid(pos_aff) + 1e-6) + tf.reduce_sum(tf.log(1-tf.sigmoid(neg_aff) + 1e-6), axis =1 )
        
        self.link_loss = -tf.reduce_mean(self.likelihood)
        self.walk_loss *= self.walk_loss_lambda
        self.loss = self.link_loss + self.walk_loss
        #self.loss += self.regu_lambda * self.l2_loss
        
        if self.optimizer == "Adam":
            self.optim = tf.train.AdamOptimizer(self.learning_rate)
        elif self.optimizer == "SGD":
            self.optim = tf.train.GradientDescentOptimizer(self.learning_rate)
        elif self.optimizer == "Momentum":
            self.optim = tf.train.MomentumOptimizer(learning_rate= self.learning_rate, momentum = 0.9)
        
        # Clipping
        # grads_and_vars = self.optim.compute_gradients(self.loss)
        # clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var)
        #     for grad, var in grads_and_vars]
        # self.opt_op = self.optim.apply_gradients(clipped_grads_and_vars)

        # No clipping
        self.opt_op = self.optim.minimize(self.loss)
 def _logit(x, W, b, proj):
   y = x
   if proj is not None:
     y = tf.einsum('ibd,ed->ibe', y, proj)
   return tf.einsum('ibd,nd->ibn', y, W) + b
Exemple #44
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))
    num_pairs = field_size * (field_size - 1) / 2

    #------bulid weights------
    Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())
    #Prod_Kernel = tf.get_variable(name='kernel', shape=[embedding_size, num_pairs, embedding_size], initializer=tf.glorot_normal_initializer())


    #------build feaure-------
    feat_ids  = features['feat_ids']									# None * F * 1
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size])
    feat_vals = features['feat_vals']									# None * F * 1
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size])

    #------build f(x)------
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) 		# None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("Embedding-layer"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) 		# None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals) 				# None * F * K

    with tf.variable_scope("Product-layer"):
		if FLAGS.model_type == 'FNN':
			deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size])
		elif FLAGS.model_type == 'Inner':
			row = []
			col = []
			for i in range(field_size-1):
				for j in range(i+1, field_size):
					row.append(i)
					col.append(j)
			p = tf.gather(embeddings, row, axis=1)
			q = tf.gather(embeddings, col, axis=1)
	        #p = tf.reshape(p, [-1, num_pairs, embedding_size])
            #q = tf.reshape(q, [-1, num_pairs, embedding_size])
			inner = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs])										# None * (F*(F-1)/2)
			deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), inner], 1)	# None * ( F*K+F*(F-1)/2 )
		elif FLAGS.model_type == 'Outer':             #ERROR: NOT ready yet
			row = []
			col = []
			for i in range(field_size-1):
				for j in range(i+1, field_size):
					row.append(i)
					col.append(j)
			p = tf.gather(embeddings, row, axis=1)
			q = tf.gather(embeddings, col, axis=1)
	        #p = tf.reshape(p, [-1, num_pairs, embedding_size])
            #q = tf.reshape(q, [-1, num_pairs, embedding_size])
			#einsum('i,j->ij', p, q)  # output[i,j] = p[i]*q[j]				# Outer product
			outer = tf.reshape(tf.einsum('api,apj->apij', p, q), [-1, num_pairs*embedding_size*embedding_size])	# None * (F*(F-1)/2*K*K)
			deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), outer], 1)	# None * ( F*K+F*(F-1)/2*K*K )


    with tf.variable_scope("Deep-part"):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
            	weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)

            if FLAGS.batch_norm:
				deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)   	#放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
				deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])                              	#Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
            	#deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y_d = tf.reshape(y_deep,shape=[-1])

    with tf.variable_scope("PNN-out"):
        #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label是不传
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)      # None * 1
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
def mul_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
                                  proj_initializer, div_val=1, perms=None,
                                  proj_same_dim=True,
                                  scope='adaptive_embed'):
  """
  perms: If None, first compute W = W1 x W2 (projection for each bin),
      and then compute X x W (embedding lookup). If not None,
      use bin-based embedding lookup with max_bin_size defined by
      the shape of perms.
  """
  emb_scale = d_proj ** 0.5
  with tf.variable_scope(scope):
    if div_val == 1:
      lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
                                     initializer=initializer)
      y = embedding_lookup(lookup_table, x)
      if d_proj != d_embed:
        proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
                                 initializer=proj_initializer)
        y = tf.einsum('ibe,ed->ibd', y, proj_W)
      else:
        proj_W = None
      ret_params = [lookup_table, proj_W]
    else:
      tables, projs = [], []
      cutoff_ends = [0] + cutoffs + [n_token]
      x_size = tf.shape(x)
      if perms is None:
        cat_lookup = []
      else:
        cat_lookup = tf.zeros([x_size[0], x_size[1], d_proj])
      for i in range(len(cutoff_ends) - 1):
        with tf.variable_scope('cutoff_{}'.format(i)):
          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
          cur_d_embed = d_embed // (div_val ** i)
          lookup_table = tf.get_variable('lookup_table',
                                         [r_idx - l_idx, cur_d_embed],
                                         initializer=initializer)
          if cur_d_embed == d_proj and not proj_same_dim:
            proj_W = None
          else:
            proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
                                   initializer=proj_initializer)
          if perms is None:
            cat_lookup.append(tf.einsum('ie,ed->id', lookup_table, proj_W))
          else:
            # speed up the computation of the first bin
            # also save some meory
            if i == 0:
              cur_y = embedding_lookup(lookup_table, tf.minimum(x, r_idx - 1))
              if proj_W is not None:
                cur_y = tf.einsum('ibe,ed->ibd', cur_y, proj_W)
              cur_y *= perms[i][:, :, None]
              cat_lookup += cur_y
            else:
              cur_x = tf.einsum('ib,ibk->k', tf.to_float(x - l_idx), perms[i])
              cur_x = tf.to_int32(cur_x)
              cur_y = embedding_lookup(lookup_table, cur_x)
              if proj_W is not None:
                cur_y = tf.einsum('ke,ed->kd', cur_y, proj_W)
              cat_lookup += tf.einsum('kd,ibk->ibd', cur_y, perms[i])
          tables.append(lookup_table)
          projs.append(proj_W)
      if perms is None:
        cat_lookup = tf.concat(cat_lookup, 0)
        y = embedding_lookup(cat_lookup, x)
      else:
        y = cat_lookup
      ret_params = [tables, projs]

  y *= emb_scale
  return y, ret_params
 def grad(dy):
     dloss_dw_out = tf.einsum('btj,btk->jk', psp, dy)
     dloss_dba_out = tf.einsum('btj,btk->jk', psp, dy) if FLAGS.eprop == 'adaptive' else tf.zeros_like(BA_out)
     dloss_dpsp = tf.einsum('bik,jk->bij', dy, BA_out)
     return [dloss_dpsp, dloss_dw_out, dloss_dba_out]
Exemple #47
0
def attention_layer(from_tensor,
                    to_tensor,
                    attention_mask=None,
                    num_attention_heads=1,
                    size_per_head=512,
                    query_act=None,
                    key_act=None,
                    value_act=None,
                    attention_probs_dropout_prob=0.0,
                    initializer_range=0.02,
                    batch_size=None,
                    from_seq_length=None,
                    to_seq_length=None):
    """Performs multi-headed attention from `from_tensor` to `to_tensor`.

  This is an implementation of multi-headed attention based on "Attention
  is all you Need". If `from_tensor` and `to_tensor` are the same, then
  this is self-attention. Each timestep in `from_tensor` attends to the
  corresponding sequence in `to_tensor`, and returns a fixed-with vector.
  This function first projects `from_tensor` into a "query" tensor and
  `to_tensor` into "key" and "value" tensors. These are (effectively) a list
  of tensors of length `num_attention_heads`, where each tensor is of shape
  [batch_size, seq_length, size_per_head].
  Then, the query and key tensors are dot-producted and scaled. These are
  softmaxed to obtain attention probabilities. The value tensors are then
  interpolated by these probabilities, then concatenated back to a single
  tensor and returned.
  In practice, the multi-headed attention are done with tf.einsum as follows:
    Input_tensor: [BFD]
    Wq, Wk, Wv: [DNH]
    Q:[BFNH] = einsum('BFD,DNH->BFNH', Input_tensor, Wq)
    K:[BTNH] = einsum('BTD,DNH->BTNH', Input_tensor, Wk)
    V:[BTNH] = einsum('BTD,DNH->BTNH', Input_tensor, Wv)
    attention_scores:[BNFT] = einsum('BFNH,BTNH>BNFT', Q, K) / sqrt(H)
    attention_probs:[BNFT] = softmax(attention_scores)
    context_layer:[BFNH] = einsum('BNFT,BTNH->BFNH', attention_probs, V)
    Wout:[DNH]
    Output:[BFD] = einsum('BFNH,DNH>BFD', context_layer, Wout)
  Args:
    from_tensor: float Tensor of shape [batch_size, from_seq_length,
      from_width].
    to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
    attention_mask: (optional) int32 Tensor of shape [batch_size,
      from_seq_length, to_seq_length]. The values should be 1 or 0. The
      attention scores will effectively be set to -infinity for any positions in
      the mask that are 0, and will be unchanged for positions that are 1.
    num_attention_heads: int. Number of attention heads.
    size_per_head: int. Size of each attention head.
    query_act: (optional) Activation function for the query transform.
    key_act: (optional) Activation function for the key transform.
    value_act: (optional) Activation function for the value transform.
    attention_probs_dropout_prob: (optional) float. Dropout probability of the
      attention probabilities.
    initializer_range: float. Range of the weight initializer.
    batch_size: (Optional) int. If the input is 2D, this might be the batch size
      of the 3D version of the `from_tensor` and `to_tensor`.
    from_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `from_tensor`.
    to_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `to_tensor`.

  Returns:
    float Tensor of shape [batch_size, from_seq_length, num_attention_heads,
      size_per_head].
  Raises:
    ValueError: Any of the arguments or tensor shapes are invalid.
  """
    from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
    to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])

    if len(from_shape) != len(to_shape):
        raise ValueError(
            "The rank of `from_tensor` must match the rank of `to_tensor`.")

    if len(from_shape) == 3:
        batch_size = from_shape[0]
        from_seq_length = from_shape[1]
        to_seq_length = to_shape[1]
    elif len(from_shape) == 2:
        if (batch_size is None or from_seq_length is None
                or to_seq_length is None):
            raise ValueError(
                "When passing in rank 2 tensors to attention_layer, the values "
                "for `batch_size`, `from_seq_length`, and `to_seq_length` "
                "must all be specified.")

    # Scalar dimensions referenced here:
    #   B = batch size (number of sequences)
    #   F = `from_tensor` sequence length
    #   T = `to_tensor` sequence length
    #   N = `num_attention_heads`
    #   H = `size_per_head`

    # `query_layer` = [B, F, N, H]
    query_layer = dense_layer_3d(from_tensor, num_attention_heads,
                                 size_per_head,
                                 create_initializer(initializer_range),
                                 query_act, "query")

    # `key_layer` = [B, T, N, H]
    key_layer = dense_layer_3d(to_tensor, num_attention_heads, size_per_head,
                               create_initializer(initializer_range), key_act,
                               "key")

    # `value_layer` = [B, T, N, H]
    value_layer = dense_layer_3d(to_tensor, num_attention_heads, size_per_head,
                                 create_initializer(initializer_range),
                                 value_act, "value")

    # Take the dot product between "query" and "key" to get the raw
    # attention scores.
    attention_scores = tf.einsum("BTNH,BFNH->BNFT", key_layer, query_layer)
    attention_scores = tf.multiply(attention_scores,
                                   1.0 / math.sqrt(float(size_per_head)))

    if attention_mask is not None:
        # `attention_mask` = [B, 1, F, T]
        attention_mask = tf.expand_dims(attention_mask, axis=[1])

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0

        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        attention_scores += adder

    # Normalize the attention scores to probabilities.
    # `attention_probs` = [B, N, F, T]
    attention_probs = tf.nn.softmax(attention_scores)

    # This is actually dropping out entire tokens to attend to, which might
    # seem a bit unusual, but is taken from the original Transformer paper.
    attention_probs = dropout(attention_probs, attention_probs_dropout_prob)

    # `context_layer` = [B, F, N, H]
    context_layer = tf.einsum("BNFT,BTNH->BFNH", attention_probs, value_layer)

    return context_layer
Exemple #48
0
    def call(self, x, y, bias, cache=None):
        """Apply attention mechanism to x and y.
    Args:
      x: a tensor with shape [batch_size, length_x + num_ve, hidden_size]
      y: a tensor with shape [batch_size, length_y + num_ve, hidden_size]
      bias: attention bias that will be added to the result of the dot product.
      cache: (Used during prediction) dictionary with tensors containing results
        of previous attentions. The dictionary must have the items:
            {"k": tensor with shape [batch_size, i, key_channels],
             "v": tensor with shape [batch_size, i, value_channels]}
        where i is the current decoded length.
    Returns:
      AttentionOne layer output with shape [batch_size, length_x + num_ve, hidden_size]
    """
        num_ve = self.num_vir_entities
        length = tf.shape(x)[1]  # input_length + num_ve
        depth = (self.hidden_size // self.num_heads)

        # Linearly project the query (q), key (k) and value (v) using different
        # learned projections. This is in preparation of splitting them into
        # multiple heads. Multi-head attention uses multiple queries, keys, and
        # values rather than regular attention (which uses a single q, k, v).
        q = self.q_dense_layer(x)
        k = self.k_dense_layer(y)
        v = self.v_dense_layer(y)

        if cache is not None:
            # Combine cached keys and values with new keys and values.
            k = tf.concat([cache["k"], k], axis=1)
            v = tf.concat([cache["v"], v], axis=1)
            # Update cache
            cache["k"] = k
            cache["v"] = v

        # Split q, k, v into heads.
        q = self.split_heads(q)  # shape (batch_size, num_heads, length, depth)
        k = self.split_heads(k)
        v = self.split_heads(v)

        # collapse the batch dimension and head dimensions to operate simultaneously on all heads.
        q = tf.reshape(q, (-1, length, depth))  # shape (-1, length, depth)
        k = tf.reshape(k, (-1, length, depth))
        v = tf.reshape(v, (-1, length, depth))

        # Scale q to prevent the dot product between q and k from growing too large.
        q *= depth**-0.5

        # Calculate dot product attention. Only standard entities update reps of standard entities.
        # Virtual entities receive updates from all entities.
        logits_std = tf.einsum('aib,ajb->aij', q[:, :length - num_ve, :],
                               k[:, :length -
                                 num_ve, :])  # (-1, len-num_ve, len-num_ve)
        logits_vir = tf.einsum('aib,ajb->aij', q[:, length - num_ve:, :],
                               k)  # (-1, num_ve, length)

        bias = self.split_collapse(bias)  # bias has shape (-1, 1, length)
        logits_std += bias

        weights_std = tf.nn.softmax(logits_std, name="weights_qk_std")
        weights_vir = tf.nn.softmax(logits_vir, name="weights_qk_vir")

        if self.train:
            weights_std = tf.nn.dropout(weights_std,
                                        1.0 - self.attention_dropout)
            weights_vir = tf.nn.dropout(weights_vir,
                                        1.0 - self.attention_dropout)

        ao_std = tf.einsum('aij,ajc->aic', weights_std,
                           v[:, :length -
                             num_ve, :])  # shape (-1, length-num_ve, depth)
        ao_vir = tf.einsum('aij,ajc->aic', weights_vir,
                           v)  # shape (-1, num_ve, depth)
        ao = tf.concat([ao_std, ao_vir], axis=-2)  # shape (-1, length, depth)

        attention_output = tf.reshape(ao, (-1, self.num_heads, length, depth))

        # Recombine heads --> [batch_size, length, hidden_size]
        attention_output = self.combine_heads(attention_output)

        # Run the combined outputs through another linear projection layer.
        attention_output = self.output_dense_layer(attention_output)
        return attention_output  # shape (batch_size, length, hidden_size)
    def buildGamStep(self, iStep, ListWeightUZ, ListBiasUZ, ListWeightGam,
                     ListBiasGam):
        dic = {}
        dic["LRate"] = tf.compat.v1.placeholder(tf.float32,
                                                shape=[],
                                                name="learning_rate")
        dic["XPrev"] = tf.compat.v1.placeholder(dtype=tf.float32,
                                                shape=[None, self.d],
                                                name='XPrev')
        dic["RandG"] = tf.compat.v1.placeholder(
            dtype=tf.float32,
            shape=[None, self.d, self.nbStepGam - iStep],
            name='randG')
        sample_size = tf.shape(dic["XPrev"])[0]
        sig = self.model.sigScal
        mu = self.model.muScal
        rescale = sig * np.sqrt(self.TStepGam * iStep)
        normX0 = (dic["XPrev"] - self.xInit -
                  mu * self.TStepGam * iStep) / rescale
        if (iStep < self.nbStepGam):
            dic["Gam"] = self.networkGam.createNetworkWithInitializer(
                normX0, iStep, ListWeightGam[-1], ListBiasGam[-1], rescale)
        else:
            dic["Gam"] = self.networkGam.createNetwork(normX0, iStep, rescale)

        sqrtDt = np.sqrt(self.TStepGam)
        XNext = dic["XPrev"]
        XNextAnti = dic["XPrev"]
        GamTraj = tf.zeros([sample_size, self.d, self.d])
        WAccul = tf.zeros([sample_size, self.d])
        TAccul = 0.
        for i in range(len(ListWeightGam) - 1):
            iStepLoc = iStep + i + 1
            tLoc = (iStep + i + 1) * self.TStepGam
            WAccul = WAccul + sqrtDt * dic["RandG"][:, :, i]
            TAccul = TAccul + self.TStepGam
            XNext = XNext + mu * self.TStepGam + sig * sqrtDt * dic[
                "RandG"][:, :, i]
            XNextAnti = XNextAnti + mu * self.TStepGam - sig * sqrtDt * dic[
                "RandG"][:, :, i]
            iPosBSDE = (-i - 1) * self.nbStepGamStab
            normX = (XNext - self.xInit - mu * self.TStepGam * iStepLoc) / (
                sig * np.sqrt(self.TStepGam * iStepLoc))
            U, Z = self.networkUZ.createNetworkNotTrainable(
                normX, iStepLoc, ListWeightUZ[iPosBSDE], ListBiasUZ[iPosBSDE])
            Gam = self.networkGam.createNetworkNotTrainable(
                normX, iStepLoc, ListWeightGam[-i - 1], ListBiasGam[-i - 1])
            driver = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(Gam)) - self.model.fDW(
                    iStepLoc * self.TStepGam, XNext, U, Z, Gam))

            normXAnti = (XNextAnti - self.xInit - mu * self.TStepGam *
                         iStepLoc) / (sig * np.sqrt(self.TStepGam * iStepLoc))
            UAnti, ZAnti = self.networkUZ.createNetworkNotTrainable(
                normXAnti, self.nbStepUDU + iStepLoc, ListWeightUZ[iPosBSDE],
                ListBiasUZ[iPosBSDE])
            GamAnti = self.networkGam.createNetworkNotTrainable(
                normXAnti, self.nbStepUDU + iStepLoc, ListWeightGam[-i - 1],
                ListBiasGam[-i - 1])
            driverAnti = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(GamAnti)) - self.model.fDW(
                    iStepLoc * self.TStepGam, XNextAnti, UAnti, ZAnti,
                    GamAnti))

            normXPrev = (dic["XPrev"] - self.xInit - mu * self.TStepGam *
                         iStepLoc) / (sig * np.sqrt(self.TStepGam * iStepLoc))
            UPrev, ZPrev = self.networkUZ.createNetworkNotTrainable(
                normXPrev, 2 * self.nbStepUDU + iStepLoc,
                ListWeightUZ[iPosBSDE], ListBiasUZ[iPosBSDE])
            GamPrev = self.networkGam.createNetworkNotTrainable(
                normXPrev, 2 * self.nbStepUDU + iStepLoc,
                ListWeightGam[-i - 1], ListBiasGam[-i - 1])
            driverPrev = self.TStepGam * (0.5 * tf.einsum(
                'j,ij->i', tf.constant(sig * sig, dtype=tf.float32),
                tf.matrix_diag_part(GamPrev)) - self.model.fDW(
                    iStepLoc * self.TStepGam, dic["XPrev"], UPrev, ZPrev,
                    GamPrev))

            weight = (tf.einsum(
                'lij,j->lij',
                tf.einsum('i,lij->lij', tf.constant(1 / sig, dtype=tf.float32),
                          tf.einsum("li,lj->lij", WAccul, WAccul) - TAccul),
                tf.constant(1 / sig, dtype=tf.float32))) / (TAccul * TAccul)
            GamTraj = GamTraj - tf.einsum(
                "l,lij->lij", 0.5 *
                (driver + driverAnti - 2 * driverPrev), weight)

        if (len(ListWeightGam) > 0):
            XNext = XNext + mu * self.TStepGam + sig * sqrtDt * dic[
                "RandG"][:, :, len(ListWeightGam) - 1]
            XNextAnti = XNextAnti + mu * self.TStepGam - sig * sqrtDt * dic[
                "RandG"][:, :, len(ListWeightGam) - 1]
        GamTraj = GamTraj + 0.5 * (self.model.D2gTf(XNext) +
                                   self.model.D2gTf(XNextAnti))

        dic["weightLoc"], dic[
            "biasLoc"] = self.networkGam.getBackWeightAndBias(iStep)
        dic["Loss"] = tf.reduce_mean(tf.pow(dic["Gam"] - GamTraj, 2))
        dic["train"] = tf.compat.v1.train.AdamOptimizer(
            learning_rate=dic["LRate"]).minimize(dic["Loss"])
        return dic
Exemple #50
0
def matmul_joint_coords(transformation_matrices, coords):
    return tf.einsum('Bij,BCj->BCi', transformation_matrices, coords)
    def make_eval_graph(
        self,
        batch_size,
        source_length,
        target_length,
        bos_token_id,
    ):
        """Make all the placeholders and outputs."""
        with tf.name_scope('eval_placeholders_len{0}'.format(source_length)):
            inputs = tf.placeholder(
                dtype=tf.int32,
                shape=[batch_size, source_length],
                name='inputs',
            )
            bos_tokens = tf.constant([bos_token_id] * batch_size,
                                     dtype=tf.int32,
                                     shape=[batch_size],
                                     name='bos_tokens')

        with tf.name_scope('eval_encoder_len{0}'.format(source_length)):
            embedded_encoder_inputs = tf.nn.embedding_lookup(
                self.source_embedding_matrix,
                inputs,
                name='embedded_encoder_inputs',
            )
            h_start_encoder = tf.zeros(
                [batch_size, self.hidden_size],
                name='h_start_encoder',
                dtype=tf.float32,
            )
            h_prev_encoder = h_start_encoder
            h_states_encoder = []
            for i in range(source_length):
                h_states_encoder.append(
                    gru_update(embedded_encoder_inputs[:, i, :],
                               h_prev_encoder, self.source_gru_params, i))
                h_prev_encoder = h_states_encoder[-1]

            # concatenated_states will have shape
            # (batch_size, num_steps * hidden_size)
            concatenated_states_encoder = tf.concat(
                h_states_encoder, axis=1, name='concatenated_states_encoder')
            # reshaped_states (which will get used for attention)
            # will have have shape (batch_size, num_steps, hidden_size)
            reshaped_states_encoder = tf.reshape(
                concatenated_states_encoder,
                [batch_size, source_length, self.hidden_size],
                name='reshaped_states_encoder',
            )
            # attended_states will have shape
            # (batch_size, num_steps, hidden_size)
            attended_states = tf.identity(
                tf.einsum(
                    'ij,fgj->fgi',
                    self.attention_params['W'],
                    reshaped_states_encoder,
                ),
                name='attended_states',
            )
            # final_states will have shape
            # (batch_size, hidden_size)
            final_states = h_states_encoder[-1]

        with tf.name_scope('eval_decoder_len{0}'.format(source_length)):
            # embedded_decoder_inputs = tf.nn.embedding_lookup(
            #     self.target_embedding_matrix,
            #     targets,
            #     name='embedded_decoder_inputs',
            # )
            transposed_target_embeddings = tf.transpose(
                self.target_embedding_matrix,
                [1, 0],
                'transposed_target_embeddings',
            )
            h_prev_decoder = final_states
            prev_outputs = bos_tokens
            attention_weights = []
            output_tokens = []
            for i in range(target_length):
                embedded_decoder_inputs = tf.nn.embedding_lookup(
                    self.target_embedding_matrix,
                    prev_outputs,
                    name='embedded_decoder_inputs{0}'.format(i),
                )
                # attention_weights_unnormalized will have shape
                # (batch_size, source_length)
                attention_weights_unnormalized = tf.identity(
                    tf.einsum(
                        'ik,ijk->ij',
                        h_prev_decoder,
                        attended_states,
                    ),
                    name='attention_weights_unnormalized{0}'.format(i))
                # attention_weights_normalized will have shape
                # (batch_size, source_length)
                attention_weights_normalized = tf.nn.softmax(
                    attention_weights_unnormalized,
                    name='attention_weights_normalized{0}'.format(i))
                attention_weights.append(attention_weights_normalized)
                # context_vector will have shape
                # (batch_size, hidden_size)
                context_vector = tf.identity(
                    tf.einsum(
                        'ij,ijk->ik',
                        attention_weights_normalized,
                        reshaped_states_encoder,
                    ),
                    name='context_vector{0}'.format(i))
                h_states = self._attn_gru_update(embedded_decoder_inputs,
                                                 context_vector,
                                                 h_prev_decoder, i)
                antiembeddings = tf.nn.xw_plus_b(
                    h_states,
                    self.softmax_params['W'],
                    self.softmax_params['b'],
                    name='antiembeddings{0}'.format(i),
                )
                logits = tf.matmul(
                    antiembeddings,
                    transposed_target_embeddings,
                    name='logits{0}'.format(i),
                )
                output_tokens.append(
                    tf.argmax(
                        logits,
                        axis=1,
                        name='output{0}'.format(i),
                    ))

                h_prev_decoder = h_states
                prev_outputs = output_tokens[-1]
            # concatenated_attention_weights will have shape
            # (batch_size, (target_length - 1) * source_length)
            # concatenated_attention_weights = tf.concat(
            #     attention_weights,
            #     axis=1,
            #     name='concatenated_attention_weights'
            # )
            # reshaped_attention_weights will have have shape
            # (batch_size, (target_length - 1) * source_length)
            # reshaped_attention_weights = tf.reshape(
            #     concatenated_attention_weights,
            #     [batch_size, target_length, source_length],
            #     name='attention_weights',
            # )
            stacked_attention_weights = tf.stack(attention_weights,
                                                 axis=1,
                                                 name='attention_weights')
            outputs = tf.stack(output_tokens, axis=1, name='eval_outputs')

        return {
            'placeholders': {
                'inputs': inputs,
            },
            'outputs': {
                'outputs': outputs,
                'attention_weights': stacked_attention_weights,
                'attention_weights_as_array': attention_weights,
            },
        }
Exemple #52
0
 def _logit(x, W, b, proj=None):
     y = x
     if proj is not None:
         y = tf.einsum("ibd,ed->ibe", y, proj)
     return tf.einsum("ibd,nd->ibn", y, W) + b
    def make_training_graph(
        self,
        batch_size,
        source_length,
        target_length,
    ):
        """Make all the placeholders, outputs, and training ops."""
        with tf.name_scope('placeholders_len{0}'.format(source_length)):
            inputs = tf.placeholder(
                dtype=tf.int32,
                shape=[batch_size, source_length],
                name='inputs',
            )
            targets = tf.placeholder(
                dtype=tf.int32,
                shape=[batch_size, target_length],
                name='targets',
            )
            learning_rate = tf.placeholder(
                dtype=tf.float32,
                shape=[],
                name='learning_rate',
            )
            max_norm = tf.placeholder(
                dtype=tf.float32,
                shape=[],
                name='max_norm',
            )

        with tf.name_scope('encoder_len{0}'.format(source_length)):
            embedded_encoder_inputs = tf.nn.embedding_lookup(
                self.source_embedding_matrix,
                inputs,
                name='embedded_encoder_inputs',
            )
            h_start_encoder = tf.zeros(
                [batch_size, self.hidden_size],
                name='h_start_encoder',
                dtype=tf.float32,
            )
            h_prev_encoder = h_start_encoder
            h_states_encoder = []
            for i in range(source_length):
                h_states_encoder.append(
                    gru_update(embedded_encoder_inputs[:, i, :],
                               h_prev_encoder, self.source_gru_params, i))
                h_prev_encoder = h_states_encoder[-1]

            # concatenated_states will have shape
            # (batch_size, num_steps * hidden_size)
            concatenated_states_encoder = tf.concat(
                h_states_encoder, axis=1, name='concatenated_states_encoder')
            # reshaped_states will have have shape
            # (batch_size, num_steps, hidden_size)
            reshaped_states_encoder = tf.reshape(
                concatenated_states_encoder,
                [batch_size, source_length, self.hidden_size],
                name='reshaped_states_encoder',
            )
            # attended_states will have shape
            # (batch_size, num_steps, hidden_size)
            attended_states = tf.identity(
                tf.einsum(
                    'ij,fgj->fgi',
                    self.attention_params['W'],
                    reshaped_states_encoder,
                ),
                name='attended_states',
            )
            # final_states will have shape
            # (batch_size, hidden_size)
            final_states = h_states_encoder[-1]
            tf.summary.histogram(
                'concatenated_states_encoder',
                concatenated_states_encoder,
                collections=['summaries_len{0}'.format(source_length)],
            )

        with tf.name_scope('decoder_len{0}'.format(source_length)):
            embedded_decoder_inputs = tf.nn.embedding_lookup(
                self.target_embedding_matrix,
                targets,
                name='embedded_decoder_inputs',
            )
            h_prev_decoder = final_states
            attention_weights = []
            h_states_decoder = []
            for i in range(target_length - 1):
                # attention_weights_unnormalized will have shape
                # (batch_size, source_length)
                attention_weights_unnormalized = tf.identity(
                    tf.einsum(
                        'ik,ijk->ij',
                        h_prev_decoder,
                        attended_states,
                    ),
                    name='attention_weights_unnormalized{0}'.format(i))
                # attention_weights_normalized will have shape
                # (batch_size, source_length)
                attention_weights_normalized = tf.nn.softmax(
                    attention_weights_unnormalized,
                    name='attention_weights_normalized{0}'.format(i))
                attention_weights.append(attention_weights_normalized)
                # context_vector will have shape
                # (batch_size, hidden_size)
                context_vector = tf.identity(
                    tf.einsum(
                        'ij,ijk->ik',
                        attention_weights_normalized,
                        reshaped_states_encoder,
                    ),
                    name='context_vector{0}'.format(i))
                h_states_decoder.append(
                    self._attn_gru_update(embedded_decoder_inputs[:, i, :],
                                          context_vector, h_prev_decoder, i))
                h_prev_decoder = h_states_decoder[-1]

            # concatenated_states will have shape
            # (batch_size, num_steps * hidden_size)
            concatenated_states_decoder = tf.concat(
                h_states_decoder, axis=1, name='concatenated_states_decoder')
            # long_and_skinny_states will have shape
            # (batch_size * num_steps, hidden_size)
            long_and_skinny_states = tf.reshape(
                concatenated_states_decoder,
                [batch_size * (target_length - 1), self.hidden_size],
                name='long_and_skinny_states',
            )
            # long_and_skinny_logits will have shape
            # (batch_size * num_steps, vocab_size)
            long_and_skinny_antiembeddings = tf.nn.xw_plus_b(
                long_and_skinny_states,
                self.softmax_params['W'],
                self.softmax_params['b'],
                name='long_and_skinny_antiembeddings',
            )
            transposed_target_embeddings = tf.transpose(
                self.target_embedding_matrix,
                [1, 0],
                'transposed_target_embeddings',
            )
            long_and_skinny_logits = tf.matmul(
                long_and_skinny_antiembeddings,
                transposed_target_embeddings,
                name='long_and_skinny_logits',
            )
            # logits will have shape
            # (batch_size, num_steps, vocab_size)
            logits = tf.reshape(
                long_and_skinny_logits,
                [batch_size, (target_length - 1), self.target_vocab_size],
                name='logits')
            # concatenated_attention_weights will have shape
            # (batch_size, (target_length - 1) * source_length)
            concatenated_attention_weights = tf.concat(
                attention_weights,
                axis=1,
                name='concatenated_attention_weights')
            tf.summary.histogram(
                'concatenated_states_decoder',
                concatenated_states_encoder,
                collections=['summaries_len{0}'.format(source_length)],
            )
            tf.summary.histogram(
                'concatenated_attention_weights',
                concatenated_attention_weights,
                collections=['summaries_len{0}'.format(source_length)],
            )

        with tf.name_scope('summary_len{0}'.format(source_length)):
            targets_without_start_token = tf.identity(
                targets[:, 1:], name='targets_without_start_token')
            batch_loss = tf.contrib.seq2seq.sequence_loss(
                logits=logits,
                targets=targets_without_start_token,
                weights=tf.ones_like(targets_without_start_token,
                                     dtype=tf.float32),
                average_across_timesteps=True,
                average_across_batch=True,
                name='batch_loss',
            )
            loss = tf.reduce_sum(
                batch_loss,
                name='loss',
            )
            predictions = tf.cast(
                tf.argmax(
                    logits,
                    axis=-1,
                ),
                tf.int32,
                name='predictions',
            )
            num_correct_predictions = tf.reduce_sum(
                tf.cast(tf.equal(predictions, targets_without_start_token),
                        tf.int32),
                name='num_correct_predictions',
            )
            tf.summary.scalar(
                'loss',
                loss,
                collections=['summaries_len{0}'.format(source_length)],
            )
            tf.summary.scalar(
                'num_correct_predictions',
                num_correct_predictions,
                collections=['summaries_len{0}'.format(source_length)],
            )

        with tf.name_scope('train_ops_len{0}'.format(source_length)):
            trainable_variables = tf.trainable_variables()
            unclipped_gradients = tf.gradients(loss, trainable_variables)
            gradient_global_norm = tf.global_norm(unclipped_gradients,
                                                  name='gradient_global_norm')
            clipped_gradients, _ = tf.clip_by_global_norm(
                unclipped_gradients, max_norm, name='clipped_gradients')
            optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            train_op = optimizer.apply_gradients(
                zip(clipped_gradients, trainable_variables), )

        merged_summaries = tf.summary.merge_all(
            key='summaries_len{0}'.format(source_length))

        return {
            'placeholders': {
                'inputs': inputs,
                'targets': targets,
                'learning_rate': learning_rate,
                'max_norm': max_norm,
            },
            'outputs': {
                'loss': loss,
                'num_correct_predictions': num_correct_predictions,
            },
            'train_ops': {
                'train_op':
                train_op,
                'gradient_global_norm':
                gradient_global_norm,
                'summary':
                tf.summary.merge([
                    self.merged_variable_summaries,
                    merged_summaries,
                ]),
            }
        }
    def __init__(
            self, review_num_u, review_num_i, review_len_u, review_len_i, user_num, item_num, num_classes,
            user_vocab_size, item_vocab_size, n_latent, embedding_id, attention_size,
            embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
        self.input_u = tf.placeholder(tf.int32, [None, review_num_u, review_len_u], name="input_u")
        self.input_i = tf.placeholder(tf.int32, [None, review_num_i, review_len_i], name="input_i")
        self.input_reuid = tf.placeholder(tf.int32, [None, review_num_u], name='input_reuid')
        self.input_reiid = tf.placeholder(tf.int32, [None, review_num_i], name='input_reuid')
        self.input_y = tf.placeholder(tf.float32, [None, 1], name="input_y")
        self.input_uid = tf.placeholder(tf.int32, [None, 1], name="input_uid")
        self.input_iid = tf.placeholder(tf.int32, [None, 1], name="input_iid")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.drop0 = tf.placeholder(tf.float32, name="dropout0")
        iidW = tf.Variable(tf.random_uniform([item_num + 2, embedding_id], -0.1, 0.1), name="iidW")
        uidW = tf.Variable(tf.random_uniform([user_num + 2, embedding_id], -0.1, 0.1), name="uidW")

        l2_loss = tf.constant(0.0)
        with tf.name_scope("user_embedding"):
            self.W1 = tf.Variable(
                tf.random_uniform([user_vocab_size, embedding_size], -1.0, 1.0),
                name="W1")
            self.embedded_user = tf.nn.embedding_lookup(self.W1, self.input_u)
            self.embedded_users = tf.expand_dims(self.embedded_user, -1)


        with tf.name_scope("item_embedding"):
            self.W2 = tf.Variable(
                tf.random_uniform([item_vocab_size, embedding_size], -1.0, 1.0),
                name="W2")
            self.embedded_item = tf.nn.embedding_lookup(self.W2, self.input_i)
            self.embedded_items = tf.expand_dims(self.embedded_item, -1)


        pooled_outputs_u = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("user_conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                self.embedded_users = tf.reshape(self.embedded_users, [-1, review_len_u, embedding_size, 1])

                conv = tf.nn.conv2d(
                    self.embedded_users,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, review_len_u - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs_u.append(pooled)
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool_u = tf.concat(3,pooled_outputs_u)
        
        self.h_pool_flat_u = tf.reshape(self.h_pool_u, [-1, review_num_u, num_filters_total])

        pooled_outputs_i = []

        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("item_conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                self.embedded_items = tf.reshape(self.embedded_items, [-1, review_len_i, embedding_size, 1])
                conv = tf.nn.conv2d(
                    self.embedded_items,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, review_len_i - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs_i.append(pooled)
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool_i = tf.concat(3,pooled_outputs_i)
        self.h_pool_flat_i = tf.reshape(self.h_pool_i, [-1, review_num_i, num_filters_total])
        
        with tf.name_scope("dropout"):
            self.h_drop_u = tf.nn.dropout(self.h_pool_flat_u, 1.0)
            self.h_drop_i = tf.nn.dropout(self.h_pool_flat_i, 1.0)
        with tf.name_scope("attention"):
            Wau = tf.Variable(tf.random_uniform([num_filters_total, attention_size],
                                                -0.1,
                                                0.1),
                              name='Wau')
            Wru = tf.Variable(tf.random_uniform([embedding_id, attention_size],
                                                -0.1,
                                                0.1),
                              name='Wru')
            Wpu = tf.Variable(tf.random_uniform([attention_size, 1],
                                                -0.1,
                                                0.1),
                              name='Wpu')
            bau = tf.Variable(tf.constant(0.1, shape=[attention_size]), name="bau")
            bbu = tf.Variable(tf.constant(0.1, shape=[1]), name="bbu")
            self.iid_a = tf.nn.relu(tf.nn.embedding_lookup(iidW, self.input_reuid))
            
            self.u_j = tf.einsum('ajk,kl->ajl',
                                 tf.nn.relu(tf.einsum('ajk,kl->ajl',
                                                      self.h_drop_u,
                                                      Wau) 
                                            + tf.einsum('ajk,kl->ajl',
                                                        self.iid_a,
                                                        Wru)
                                            + bau),
                                 Wpu) + bbu  # None*u_len*1

            self.u_a = tf.nn.softmax(self.u_j, 1)  # none*u_len*1

            #print self.u_a

            Wai = tf.Variable(tf.random_uniform([num_filters_total, attention_size],
                                                -0.1,
                                                0.1),
                              name='Wai')
            Wri = tf.Variable(tf.random_uniform([embedding_id, attention_size],
                                                -0.1,
                                                0.1),
                              name='Wri')
            Wpi = tf.Variable(tf.random_uniform([attention_size, 1],
                                                -0.1,
                                                0.1),
                              name='Wpi')
            
            bai = tf.Variable(tf.constant(0.1,
                                          shape=[attention_size]),
                              name="bai")
            bbi = tf.Variable(tf.constant(0.1,
                                          shape=[1]),
                              name="bbi")
            
            self.uid_a = tf.nn.relu(tf.nn.embedding_lookup(uidW, self.input_reiid))
            self.i_j =tf.einsum('ajk,kl->ajl', tf.nn.relu(
                tf.einsum('ajk,kl->ajl', self.h_drop_i, Wai) + tf.einsum('ajk,kl->ajl', self.uid_a, Wri) + bai),
                                             Wpi)+bbi

            self.i_a = tf.nn.softmax(self.i_j,1)  # none*len*1

            l2_loss += tf.nn.l2_loss(Wau)
            l2_loss += tf.nn.l2_loss(Wru)
            l2_loss += tf.nn.l2_loss(Wri)
            l2_loss += tf.nn.l2_loss(Wai)

        with tf.name_scope("add_reviews"):
            self.u_feas = tf.reduce_sum(tf.multiply(self.u_a, self.h_drop_u), 1)
            self.u_feas = tf.nn.dropout(self.u_feas, self.dropout_keep_prob)
            self.i_feas = tf.reduce_sum(tf.multiply(self.i_a, self.h_drop_i), 1)
            self.i_feas = tf.nn.dropout(self.i_feas, self.dropout_keep_prob)
        with tf.name_scope("get_fea"):

            iidmf = tf.Variable(tf.random_uniform([item_num + 2, embedding_id], -0.1, 0.1), name="iidmf")
            uidmf = tf.Variable(tf.random_uniform([user_num + 2, embedding_id], -0.1, 0.1), name="uidmf")

            self.uid = tf.nn.embedding_lookup(uidmf,self.input_uid)
            self.iid = tf.nn.embedding_lookup(iidmf,self.input_iid)
            self.uid = tf.reshape(self.uid,[-1,embedding_id])
            self.iid = tf.reshape(self.iid,[-1,embedding_id])
            
            Wu = tf.Variable(tf.random_uniform([num_filters_total, n_latent],
                                               -0.1,
                                               0.1),
                             name='Wu')
            
            bu = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bu")
            self.u_feas = tf.matmul(self.u_feas, Wu)+self.uid + bu

            Wi = tf.Variable(
                tf.random_uniform([num_filters_total, n_latent], -0.1, 0.1), name='Wi')
            bi = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bi")
            self.i_feas = tf.matmul(self.i_feas, Wi) +self.iid+ bi

       

        with tf.name_scope('ncf'):

            self.FM = tf.multiply(self.u_feas, self.i_feas)
            self.FM = tf.nn.relu(self.FM)

            self.FM=tf.nn.dropout(self.FM,self.dropout_keep_prob)

            Wmul=tf.Variable(
                tf.random_uniform([n_latent, 1], -0.1, 0.1), name='wmul')

            self.mul=tf.matmul(self.FM,Wmul)
            self.score=tf.reduce_sum(self.mul,1,keep_dims=True)

            self.uidW2 = tf.Variable(tf.constant(0.1, shape=[user_num + 2]), name="uidW2")
            self.iidW2 = tf.Variable(tf.constant(0.1, shape=[item_num + 2]), name="iidW2")
            self.u_bias = tf.gather(self.uidW2, self.input_uid)
            self.i_bias = tf.gather(self.iidW2, self.input_iid)
            self.Feature_bias = self.u_bias + self.i_bias

            self.bised = tf.Variable(tf.constant(0.1), name='bias')

            self.predictions = self.score + self.Feature_bias + self.bised

        with tf.name_scope("loss"):
            losses = tf.nn.l2_loss(tf.subtract(self.predictions, self.input_y))

            self.loss = losses + l2_reg_lambda * l2_loss

        with tf.name_scope("accuracy"):
            self.mae = tf.reduce_mean(tf.abs(tf.subtract(self.predictions, self.input_y)))
            self.accuracy =tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.predictions, self.input_y))))
Exemple #55
0
import tensorflow as tf


a = tf.Variable(tf.ones([2, 20, 1]))
b = tf.Variable(tf.ones([20, 1, 20]))
# c = tf.matmul(b,a)
c = tf.einsum("aij,ijk->ajk", a, b)
with tf.Session() as sess:
	sess.run(tf.initialize_all_variables())
	s = sess.run([c,b,a], feed_dict = {})

for i in s:
	print i.shape
	break
Exemple #56
0
    def call(self, inputs, training=False):
        w, r, attn_mask, mems, head_mask, output_attentions = inputs
        qlen, rlen, bsz = shape_list(w)[0], shape_list(r)[0], shape_list(w)[1]

        if mems is not None:
            cat = tf.concat([mems, w], 0)
            if self.pre_lnorm:
                w_heads = self.qkv_net(self.layer_norm(cat))
            else:
                w_heads = self.qkv_net(cat)
            r_head_k = self.r_net(r)

            w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1)
            w_head_q = w_head_q[-qlen:]
        else:
            if self.pre_lnorm:
                w_heads = self.qkv_net(self.layer_norm(w))
            else:
                w_heads = self.qkv_net(w)
            r_head_k = self.r_net(r)

            w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1)

        klen = shape_list(w_head_k)[0]

        w_head_q = tf.reshape(w_head_q,
                              (qlen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head
        w_head_k = tf.reshape(w_head_k,
                              (klen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head
        w_head_v = tf.reshape(w_head_v,
                              (klen, bsz, self.n_head,
                               self.d_head))  # qlen x bsz x n_head x d_head

        r_head_k = tf.reshape(
            r_head_k,
            (rlen, self.n_head, self.d_head))  # qlen x n_head x d_head

        # compute attention score
        rw_head_q = w_head_q + self.r_w_bias  # qlen x bsz x n_head x d_head
        AC = tf.einsum("ibnd,jbnd->ijbn", rw_head_q,
                       w_head_k)  # qlen x klen x bsz x n_head

        rr_head_q = w_head_q + self.r_r_bias
        BD = tf.einsum("ibnd,jnd->ijbn", rr_head_q,
                       r_head_k)  # qlen x klen x bsz x n_head
        BD = self._rel_shift(BD)

        # [qlen x klen x bsz x n_head]
        attn_score = AC + BD
        attn_score = attn_score * self.scale

        # compute attention probability
        if attn_mask is not None:
            attn_mask_t = attn_mask[:, :, None, None]
            attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t

        # [qlen x klen x bsz x n_head]
        attn_prob = tf.nn.softmax(attn_score, axis=1)
        attn_prob = self.dropatt(attn_prob, training=training)

        # Mask heads if we want to
        if head_mask is not None:
            attn_prob = attn_prob * head_mask

        # compute attention vector
        attn_vec = tf.einsum("ijbn,jbnd->ibnd", attn_prob, w_head_v)

        # [qlen x bsz x n_head x d_head]
        attn_vec_sizes = shape_list(attn_vec)
        attn_vec = tf.reshape(
            attn_vec,
            (attn_vec_sizes[0], attn_vec_sizes[1], self.n_head * self.d_head))

        # linear projection
        attn_out = self.o_net(attn_vec)
        attn_out = self.drop(attn_out, training=training)

        if self.pre_lnorm:
            # residual connection
            outputs = [w + attn_out]
        else:
            # residual connection + layer normalization
            outputs = [self.layer_norm(w + attn_out)]

        if cast_bool_to_primitive(output_attentions) is True:
            outputs.append(attn_prob)

        return outputs
Exemple #57
0
def sh_invar_conv(signal, patches_idx, conv_tensor, kernel, l_max):
    y = sh_invar_conv_(signal, patches_idx, conv_tensor, l_max)
    return tf.einsum('inrj,bvnrj->bvi', kernel, y)
def single_mode_gate(matrix, mode, in_modes, pure=True, batched=False):
    """basic form:
    'ab,cde...b...xyz->cde...a...xyz' (pure state)
    'ab,ef...bc...xyz,cd->ef...ad...xyz' (mixed state)
    """
    if batched:
        batch_offset = 1
    else:
        batch_offset = 0
    batch_index = indices[:batch_offset]
    left_gate_str = indices[batch_offset : batch_offset + 2]  # |a><b|
    num_indices = len(in_modes.shape)
    if pure:
        num_modes = num_indices - batch_offset
        mode_size = 1
    else:
        right_gate_str = indices[batch_offset + 2 : batch_offset + 4]  # |c><d|
        num_modes = (num_indices - batch_offset) // 2
        mode_size = 2
    max_len = len(indices) - 2 * mode_size - batch_offset
    if num_modes == 0:
        raise ValueError("'in_modes' must have at least one mode")
    if num_modes > max_len:
        raise NotImplementedError(
            "The max number of supported modes for this operation is currently {}".format(max_len)
        )
    if mode < 0 or mode >= num_modes:
        raise ValueError("'mode' argument is not compatible with number of in_modes")

    other_modes_indices = indices[
        batch_offset + 2 * mode_size : batch_offset + (1 + num_modes) * mode_size
    ]
    if pure:
        eqn_lhs = "{},{}{}{}{}".format(
            batch_index + left_gate_str,
            batch_index,
            other_modes_indices[: mode * mode_size],
            left_gate_str[1],
            other_modes_indices[mode * mode_size :],
        )
        eqn_rhs = "".join(
            [
                batch_index,
                other_modes_indices[: mode * mode_size],
                left_gate_str[0],
                other_modes_indices[mode * mode_size :],
            ]
        )
    else:
        eqn_lhs = "{},{}{}{}{}{},{}".format(
            batch_index + left_gate_str,
            batch_index,
            other_modes_indices[: mode * mode_size],
            left_gate_str[1],
            right_gate_str[0],
            other_modes_indices[mode * mode_size :],
            batch_index + right_gate_str,
        )
        eqn_rhs = "".join(
            [
                batch_index,
                other_modes_indices[: mode * mode_size],
                left_gate_str[0],
                right_gate_str[1],
                other_modes_indices[mode * mode_size :],
            ]
        )

    eqn = eqn_lhs + "->" + eqn_rhs
    einsum_inputs = [matrix, in_modes]
    if not pure:
        transposed_axis = [0, 2, 1] if batched else [1, 0]
        einsum_inputs.append(tf.transpose(tf.math.conj(matrix), transposed_axis))
    output = tf.einsum(eqn, *einsum_inputs)
    return output
def inference(x, y, n_batch, is_training,
              input_digits=None,
              output_digits=None,
              n_hidden=None,
              n_out=None):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial)

    # Encode
    encoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    encoder = rnn.AttentionCellWrapper(encoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder.zero_state(n_batch, tf.float32)
    encoder_outputs = []
    encoder_states = []

    with tf.variable_scope('Encoder'):
        for t in range(input_digits):
            if t > 0:
                tf.get_variable_scope().reuse_variables()
            (output, state) = encoder(x[:, t, :], state)
            encoder_outputs.append(output)
            encoder_states.append(state)

    # Decode
    decoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    decoder = rnn.AttentionCellWrapper(decoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder_states[-1]
    decoder_outputs = [encoder_outputs[-1]]

    # 출력층의 웨이트와 바이어스를 미리 정의해둔다
    V = weight_variable([n_hidden, n_out])
    c = bias_variable([n_out])
    outputs = []

    with tf.variable_scope('Decoder'):
        for t in range(1, output_digits):
            if t > 1:
                tf.get_variable_scope().reuse_variables()

            if is_training is True:
                (output, state) = decoder(y[:, t-1, :], state)
            else:
                # 직전의 출력을 구한다
                linear = tf.matmul(decoder_outputs[-1], V) + c
                out = tf.nn.softmax(linear)
                outputs.append(out)
                out = tf.one_hot(tf.argmax(out, -1), depth=output_digits)

                (output, state) = decoder(out, state)

            decoder_outputs.append(output)

    if is_training is True:
        output = tf.reshape(tf.concat(decoder_outputs, axis=1),
                            [-1, output_digits, n_hidden])

        linear = tf.einsum('ijk,kl->ijl', output, V) + c
        return tf.nn.softmax(linear)
    else:
        # 마지막 출력을 구한다
        linear = tf.matmul(decoder_outputs[-1], V) + c
        out = tf.nn.softmax(linear)
        outputs.append(out)

        output = tf.reshape(tf.concat(outputs, axis=1),
                            [-1, output_digits, n_out])
        return output
Exemple #60
0
 def call(self, inputs):
     x = tf.einsum("nhj,hji->nhi", inputs, self.kernel)
     return self._finalize(x)