Ejemplo n.º 1
0
def auto_regressive_model(input, target, weights, bias):
    """
    Builds the auto regressive model. For details on the model, refer to the written report
    """

    hidden01 = tf.matmul(normalize(input), weights['M1']) # V_d

    hidden01 = tf.batch_matmul(tf.expand_dims(hidden01,2),tf.ones([batch_size,1,NUM_NOTES])) # V_d augmented to D across  dimension 2

    hidden02 = cumsum_weights(normalize(target), weights['M2'],D)  # V_c

    hidden = hidden01 + hidden02

    y = tf.zeros([1], tf.float32)
    split = tf.split(0, batch_size, hidden)

    y = tf.batch_matmul(tf.expand_dims(tf.transpose(tf.squeeze(split[0])), 1), tf.expand_dims(tf.transpose(weights['W']), 2))

    for i in range(1, len(split)):
        y = tf.concat(0, [y, tf.batch_matmul(tf.expand_dims(tf.transpose(tf.squeeze(split[i])), 1),
                                                     tf.expand_dims(tf.transpose(weights['W']), 2))])
    y = tf.squeeze(y)

    output = tf.reshape(y,[batch_size,NUM_NOTES])

    return output
Ejemplo n.º 2
0
  def _define_distance_to_clusters(self, data):
    """Defines the Mahalanobis distance to the assigned Gaussian."""
    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
    # mean) from log probability function.
    self._all_scores = []
    for shard in data:
      all_scores = []
      shard = tf.expand_dims(shard, 0)
      for c in xrange(self._num_classes):
        if self._covariance_type == FULL_COVARIANCE:
          cov = self._covs[c, :, :]
        elif self._covariance_type == DIAG_COVARIANCE:
          cov = tf.diag(self._covs[c, :])
        inverse = tf.matrix_inverse(cov + self._min_var)
        inv_cov = tf.tile(
            tf.expand_dims(inverse, 0),
            tf.pack([self._num_examples, 1, 1]))
        diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
        m_left = tf.batch_matmul(diff, inv_cov)
        all_scores.append(tf.sqrt(tf.batch_matmul(
            m_left, tf.transpose(diff, perm=[0, 2, 1])
        )))
      self._all_scores.append(tf.reshape(
          tf.concat(1, all_scores),
          tf.pack([self._num_examples, self._num_classes])))

    # Distance to the associated class.
    self._all_scores = tf.concat(0, self._all_scores)
    assignments = tf.concat(0, self.assignments())
    rows = tf.to_int64(tf.range(0, self._num_examples))
    indices = tf.concat(1, [tf.expand_dims(rows, 1),
                            tf.expand_dims(assignments, 1)])
    self._scores = tf.gather_nd(self._all_scores, indices)
Ejemplo n.º 3
0
  def test_lanczos_bidiag(self):
    np.random.seed(1)
    a_np = np.random.uniform(
        low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_)
    tol = 1e-12 if dtype_ == np.float64 else 1e-5

    with self.test_session() as sess:
      if use_static_shape_:
        a = tf.constant(a_np)
      else:
        a = tf.placeholder(dtype_)
      operator = util.create_operator(a)
      lbd = lanczos.lanczos_bidiag(
          operator, steps_, orthogonalize=orthogonalize_)

      # The computed factorization should satisfy the equations
      #  A * V = U * B
      #  A' * U[:, :-1] = V * B[:-1, :]'
      av = tf.batch_matmul(a, lbd.v)
      ub = lanczos.bidiag_matmul(lbd.u, lbd.alpha, lbd.beta, adjoint_b=False)
      atu = tf.batch_matmul(a, lbd.u[:, :-1], adj_x=True)
      vbt = lanczos.bidiag_matmul(lbd.v, lbd.alpha, lbd.beta, adjoint_b=True)

      if use_static_shape_:
        av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt])
      else:
        av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt],
                                                    feed_dict={a: a_np})
      self.assertAllClose(av_val, ub_val, atol=tol, rtol=tol)
      self.assertAllClose(atu_val, vbt_val, atol=tol, rtol=tol)
def log_likelihood(batch):

	#batch is NxD matrix, where N is length of batch, D is dimension of samples
	#P(D|w) = prod( sum( pi*N(samp|k))
	#exp(-square(mean-samp))

	#multiplying by ones replicates the matrix, becomes (N,D,K)
	tmp1 = tf.batch_matmul(tf.reshape(batch, [N,D,1]), tf.ones([N,1,K]))
	#same but with the means matrix
	tmp2 = tf.batch_matmul(means, tf.ones([K,1,N]))
	tmp2 = tf.transpose(tmp2, [2,1,0])
	# (x - mu)
	tmp3 = tmp1 - tmp2
	tmp4 = tmp1 - tmp2
	# (x - mu).T(x - mu)
	tmp3 = tf.batch_matmul(tf.transpose(tmp3, [0,2,1]), tmp3)
	tmp3 = tf.reduce_sum(tmp3,2)
	# -(x - mu).T(x - mu)
	tmp3 = -tmp3
	# exp(-(x - mu).T(x - mu))
	tmp3 = tf.exp(tmp3)
	#multiply by mixture weights
	tmp3 = tf.matmul(tmp3, mixture_weights)
	#log
	tmp3 = tf.log(tmp3)
	#sum over all samples of the batch
	tmp3 = tf.reduce_sum(tmp3,0)

	return tmp3
Ejemplo n.º 5
0
    def build_node(self, x_in, c_in, h_in, scope="lstm_cell"):
        #print (x_in, c_in, h_in, scope)
        #print [type(thing) for thing in (x_in, c_in, h_in, scope)]
        # print [(item.name, item.dtype) for thing in (h_in, c_in) for item in thing]
        # print (x_in.name, x_in.dtype)

        with tf.variable_scope(scope):
            # print x.shape
            # print h_in.get_shape()
            x_with_h = tf.concat(2, [x_in, h_in])

            ones_for_bias = tf.constant(np.ones([batch_size,1,1]), name="b", dtype=tf.float32)
            x_h_concat = tf.concat(2, [ones_for_bias, x_with_h])

            # forget gate layer
            # print "w_f: ", self.w_f.get_shape()
            # print "x_h_concat: ", x_h_concat.get_shape()
            f = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_f))

            # candidate values
            i = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_i))
            candidate_c = tf.tanh(tf.batch_matmul(x_h_concat, self.w_c))

            # new cell state (hidden)
            # forget old values of c
            old_c_to_keep = tf.mul(f, c_in)
            # scaled candidate values of c
            new_c_to_keep = tf.mul(i, candidate_c)
            c = tf.add(old_c_to_keep, new_c_to_keep)

            # new scaled output
            o = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_o))
            h = tf.mul(o, tf.tanh(c))
            return (c, h)
Ejemplo n.º 6
0
    def __init__(self, memory_cells, query, project_query=False):
        """Define Attention.

        Args:
            memory_cells (SequenceBatch): a SequenceBatch containing a Tensor of shape (batch_size, num_cells, cell_dim)
            query (Tensor): a tensor of shape (batch_size, query_dim).
            project_query (bool): defaults to False. If True, the query goes through an extra projection layer to
                coerce it to cell_dim.
        """
        cell_dim = memory_cells.values.get_shape().as_list()[2]
        if project_query:
            # project the query up/down to cell_dim
            self._projection_layer = Dense(cell_dim, activation='linear')
            query = self._projection_layer(query)  # (batch_size, cand_dim)

        memory_values, memory_mask = memory_cells.values, memory_cells.mask

        # batch matrix multiply to compute logit scores for all choices in all batches
        query = tf.expand_dims(query, 2)  # (batch_size, cell_dim, 1)
        logit_values = tf.batch_matmul(memory_values, query)  # (batch_size, num_cells, 1)
        logit_values = tf.squeeze(logit_values, [2])  # (batch_size, num_cells)

        # set all pad logits to negative infinity
        logits = SequenceBatch(logit_values, memory_mask)
        logits = logits.with_pad_value(-float('inf'))

        # normalize to get probs
        probs = tf.nn.softmax(logits.values)  # (batch_size, num_cells)

        retrieved = tf.batch_matmul(tf.expand_dims(probs, 1), memory_values)  # (batch_size, 1, cell_dim)
        retrieved = tf.squeeze(retrieved, [1])  # (batch_size, cell_dim)

        self._logits = logits.values
        self._probs = probs
        self._retrieved = retrieved
Ejemplo n.º 7
0
 def lstm_cell(i, o, state):
   """
   Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf
   Note that in this formulation, we omit the various connections between the
   previous state and the gates.
   """                   
   i_list = tf.pack([i, i, i, i])
   #print i_list.get_shape().as_list()
   o_list = tf.pack([o, o, o, o])
                         
   ins = tf.batch_matmul(i_list, fico_x)
   outs = tf.batch_matmul(o_list, fico_m)
   
   h_x = ins + outs + fico_b
   #print h_x.get_shape().as_list()
   
   #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb)
   forget_gate = tf.sigmoid(h_x[0,:,:])
   
   #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib)
   input_gate = tf.sigmoid(h_x[1,:,:])
   
   #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb)
   update = tf.tanh(h_x[2,:,:])
   
   state = forget_gate*state + input_gate*update
   
   #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob)
   output_gate = tf.sigmoid(h_x[3,:,:])
   
   h = output_gate * tf.tanh(state)
   #print 'h', h.get_shape().as_list()
   return h, state
Ejemplo n.º 8
0
def extract_patch(x, f_y, f_x, nchannels):
    """
    Args:
        x: [B, H, W, D]
        f_y: [B, H, FH]
        f_x: [B, W, FH]
        nchannels: D

    Returns:
        patch: [B, FH, FW]
    """
    patch = [None] * nchannels
    fsize_h = tf.shape(f_y)[2]
    fsize_w = tf.shape(f_x)[2]
    hh = tf.shape(x)[1]
    ww = tf.shape(x)[2]

    for dd in xrange(nchannels):
        # [B, H, W]
        x_ch = tf.reshape(
            tf.slice(x, [0, 0, 0, dd], [-1, -1, -1, 1]),
            tf.pack([-1, hh, ww]))
        patch[dd] = tf.reshape(tf.batch_matmul(
            tf.batch_matmul(f_y, x_ch, adj_x=True),
            f_x), tf.pack([-1, fsize_h, fsize_w, 1]))

    return tf.concat(3, patch)
Ejemplo n.º 9
0
  def Test(self):
    np.random.seed(1)
    n = shape_[-1]
    batch_shape = shape_[:-2]
    a = np.random.uniform(
        low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_)
    a += a.T
    a = np.tile(a, batch_shape + (1, 1))
    if dtype_ == np.float32:
      atol = 1e-4
    else:
      atol = 1e-12
    for compute_v in False, True:
      np_e, np_v = np.linalg.eig(a)
      with self.test_session():
        if compute_v:
          tf_e, tf_v = tf.self_adjoint_eig(tf.constant(a))

          # Check that V*diag(E)*V^T is close to A.
          a_ev = tf.batch_matmul(
              tf.batch_matmul(tf_v, tf.batch_matrix_diag(tf_e)),
              tf_v,
              adj_y=True)
          self.assertAllClose(a_ev.eval(), a, atol=atol)

          # Compare to numpy.linalg.eig.
          CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(),
                                     atol)
        else:
          tf_e = tf.self_adjoint_eigvals(tf.constant(a))
          self.assertAllClose(
              np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
Ejemplo n.º 10
0
  def build_memory(self):
    self.global_step = tf.Variable(0, name="global_step")

    # Linear Projection Layer
    self.T = tf.Variable(tf.random_normal([self.idim, self.edim],
                                          stddev=self.init_std,
                                          name="projection"))

    reshape = tf.reshape(self.story, [-1, self.idim])
    m = tf.matmul(reshape, self.T)   # [batch_size * nstory, edim]
    m = tf.reshape(m, [self.batch_size, self.nstory, -1])

    reshape = tf.reshape(self.query, [-1, self.idim])
    u = tf.matmul(reshape, self.T)   # [batch_size * 1, edim]
    u = tf.reshape(u, [self.batch_size, 1, -1])

    reshape = tf.reshape(self.answer, [-1, self.idim])
    g = tf.matmul(reshape, self.T)  # [batch_size * nanswer, edim]
    g = tf.reshape(g, [self.batch_size, self.nanswer, -1])

    for h in xrange(self.nhop):
      p = tf.batch_matmul(m, u, adj_y=True)  # [batch_size, nstory. 1]
      p = tf.reshape(p, [self.batch_size, -1])
      p = tf.nn.softmax(p)  # [batch_size, nstory]

      reshape = tf.reshape(p, [self.batch_size, -1, 1])
      o = tf.reduce_sum(tf.mul(m, reshape), 1)
      u = tf.add(o, u)

    logits = tf.batch_matmul(g, u, adj_y=True)  # [batch_size, nanswer, 1]
    logits = tf.reshape(logits, [self.batch_size, -1])
    self.logits = logits
    self.probs = tf.nn.softmax(logits)
Ejemplo n.º 11
0
  def write(self, M0, write_w0s, write_heads):
    write_w1s = []
    for i in xrange(self.n_heads):
      head = write_heads[i]
      w0 = write_w0s[i]
      w1 = NTMCell.address(M0, w0, head)
      # For analysis
      #w1 = tf.Print(w1, [w1], "write", summarize=1000)
      write_w1s.append(w1)

    M1 = M0
    # Erases
    for w1 in write_w1s:
      we = 1 - tf.batch_matmul(
        tf.expand_dims(w1, 2),
        tf.expand_dims(head["erase"], 1)
      )  
      M1 = M1 * we

    # Writes
    for w1 in write_w1s:
      add = tf.batch_matmul(
        tf.expand_dims(w1, 2),
        tf.expand_dims(head["add"], 1),
      )
      M1 = M1 + add

    return M1, write_w1s
Ejemplo n.º 12
0
 def write(self, lstm_h, Fx, Fy, gamma):
      with tf.variable_scope("writeW",reuse=self.share):
           w = self.linear(lstm_h, self.N * self.N) # batch x (write_n*write_n)
      w = tf.reshape(w, [-1, self.N, self.N])
      Fyt = tf.transpose(Fy, perm=[0, 2, 1])
      wr = tf.batch_matmul(Fyt, tf.batch_matmul(w, Fx))
      return wr*tf.reshape(1.0/gamma, [-1,1,1])
Ejemplo n.º 13
0
 def copy_net(decoder_out):
     with tf.variable_scope('copy_net') as scope:
         decoder_out = tf.reshape(decoder_out, [-1, decoder_hidden, 1])
         source_prob = tf.batch_matmul(rnn_encoder_temp, decoder_out)
         source_prob = tf.reshape(source_prob, [-1, 1, source_prob.get_shape().as_list()[1]])
         voc_prob = tf.batch_matmul(source_prob, one_hot)
         voc_prob = tf.reshape(voc_prob, [-1, voc_prob.get_shape().as_list()[-1]])
         return voc_prob
Ejemplo n.º 14
0
    def build_memory(self):
        self.global_step = tf.Variable(0, name="global_step")


        # embedding matrix A of dimension d*V, 
        # converting x_i into memory vectors v_i
        self.A = tf.Variable(tf.random_normal([self.nwords, self.edim], stddev=self.init_std))
        # embedding matrix B with the same dimension as A
        # converting q to obtain an internal state u
        self.B = tf.Variable(tf.random_normal([self.nwords, self.edim], stddev=self.init_std))
        # C converts x into o
        self.C = tf.Variable(tf.random_normal([self.edim, self.edim], stddev=self.init_std))

        # Temporal Encoding
        self.T_A = tf.Variable(tf.random_normal([self.mem_size, self.edim], stddev=self.init_std))
        self.T_B = tf.Variable(tf.random_normal([self.mem_size, self.edim], stddev=self.init_std))

        # m_i = sum A_ij * x_ij + T_A_i
				# this embedding_lookup functions retrieves rows of self.A
        Ain_c = tf.nn.embedding_lookup(self.A, self.context) # context is the previous words 
        Ain_t = tf.nn.embedding_lookup(self.T_A, self.time) # time is for temporal
        Ain = tf.add(Ain_c, Ain_t)

        # c_i = sum B_ij * u + T_B_i 
				# ???? is it B or C, looks like B is correct, but the notation is different from the paper
        Bin_c = tf.nn.embedding_lookup(self.B, self.context)
        Bin_t = tf.nn.embedding_lookup(self.T_B, self.time)
        Bin = tf.add(Bin_c, Bin_t)

        # 6 hops to go through
        for h in range(self.nhop):
            # reshape hid to be 3 dimensional
            self.hid3dim = tf.reshape(self.hid[-1], [-1, 1, self.edim]) # -1 is used to infer the shape
            # innerproduct of the memory units and the input vector
            # A_in stores the memory units, i.e., the context and temporal
            # hid represents the hidden state, and what is that? 	
            Aout = tf.batch_matmul(self.hid3dim, Ain, adj_y=True) 
            Aout2dim = tf.reshape(Aout, [-1, self.mem_size])
            P = tf.nn.softmax(Aout2dim)

            probs3dim = tf.reshape(P, [-1, 1, self.mem_size])
            Bout = tf.batch_matmul(probs3dim, Bin) # the output vector
            Bout2dim = tf.reshape(Bout, [-1, self.edim])

            Cout = tf.matmul(self.hid[-1], self.C)
            Dout = tf.add(Cout, Bout2dim) # W(o + u)

            self.share_list[0].append(Cout)

            if self.lindim == self.edim:
                self.hid.append(Dout)
            elif self.lindim == 0:
                self.hid.append(tf.nn.relu(Dout))
            else:
                F = tf.slice(Dout, [0, 0], [self.batch_size, self.lindim])
                G = tf.slice(Dout, [0, self.lindim], [self.batch_size, self.edim-self.lindim])
                K = tf.nn.relu(G)
                self.hid.append(tf.concat(1, [F, K]))
Ejemplo n.º 15
0
 def read(self, x, Fx, Fy, gamma):
    Fxr = tf.reshape(Fx, [-1, 1, self.N, self.shape[1]])
    Fyr = tf.reshape(Fy, [-1, 1, self.N, self.shape[2]])
    Fxr3 = tf.concat(1, [Fxr, Fxr, Fxr]) # batch * 3 * N * A
    Fyr3 = tf.concat(1, [Fyr, Fyr, Fyr])
    Fxt3 = tf.transpose(Fxr3, perm=[0, 1, 3, 2])
    glimpse = tf.batch_matmul(Fyr3, tf.batch_matmul(x, Fxt3))
    glimpse = tf.reshape(glimpse, [-1, self.att_size])
    return glimpse * tf.reshape(gamma, [-1,1])
Ejemplo n.º 16
0
    def get_function(points, mu, sigma): # f_ik [n,k]
        div = coef*tf.rsqrt(tf.batch_matrix_determinant(sigma)) # ((2pi)^p*|S_k|)^-1/2  [k]
        div = tf.tile(tf.reshape(div, [1,k]), [n,1]) # [n,k]
        diff = tf.sub(tf.tile(points, [k,1,1]), tf.tile(mu, [n,1,1])) # x_i-u_k [n*k, p, 1]
        sigma = tf.tile(sigma, [n,1,1]) # [n*k,p,p]
        exp = tf.exp(-0.5*tf.batch_matmul( tf.transpose(diff,perm=[0,2,1]), tf.batch_matmul(tf.batch_matrix_inverse(sigma), diff) )) # e^(d'*S^-1*d)_ik [n*k, 1, 1]
        exp = tf.reshape(exp, [n,k])

        return tf.mul(div, exp) # Multivariate normal distribution evaluated for each vector, for each cluster parameter. Hence the [n,k] shape.
Ejemplo n.º 17
0
def write(windows, N, center_x, center_y, delta, sigma, gamma):
    tol = 1e-5
    W = tf.reshape(windows, [-1, N, N])
    FX, FY = banks(center_x, center_y, sigma, delta, N, (28,28))

    I = tf.batch_matmul(W, FY);
    I = tf.batch_matmul(tf.transpose(FX, [0,2,1]), I)

    return tf.expand_dims(1/(gamma + tol),1)*tf.reshape(I, [-1, 28*28])
Ejemplo n.º 18
0
def model(input1, gating_network):

	# return tf.nn.softmax(tf.matmul(tf.transpose(gating_network), (tf.reshape(tf.batch_matmul(w, input_aa), [L-1, n_aa]) + b)))
	input_times_w = tf.reshape(tf.batch_matmul(w, input1), [L, L, n_aa])
	input_times_w_plus_b = input_times_w + b
	activation_function = tf.nn.relu(input_times_w_plus_b)
	# activation_function = tf.sigmoid(input_times_w_plus_b)
	use_gate = tf.batch_matmul(tf.transpose(activation_function, perm=[0, 2, 1]), tf.transpose(gating_network, perm=[0,1,2])) #perm=[1,0,2]
	softmax_output = tf.nn.softmax(tf.reshape(use_gate, [L, n_aa]))
	return softmax_output
Ejemplo n.º 19
0
	def buildSimilarity(self):
		q_feature = self.tensors['q_feature']
		a_feature = self.tensors['a_feature']
		with tf.name_scope('similarity'):
			q_norm = tf.sqrt(tf.reduce_sum(q_feature ** 2, reduction_indices=[1], keep_dims=True))
			a_norm = tf.sqrt(tf.reduce_sum(a_feature ** 2, reduction_indices=[1], keep_dims=True))
			product = tf.batch_matmul(q_feature, a_feature, adj_x=True, adj_y=False, name="product")
			denominator = tf.batch_matmul(q_norm, a_norm, adj_x=False, adj_y=True, name="denominator")
			similarity = tf.squeeze(product / (denominator + EPSILON), [-1,-2], name='similarity')
		self.tensors['similarity'] = similarity
Ejemplo n.º 20
0
def read(images, N, delta, gamma, sigma, center_x, center_y):
    #TODO: Make configurable shape
    FX, FY = banks(center_x, center_y, sigma, delta, N, (28,28))

    I = tf.reshape(images, [-1, 28, 28])

    I = tf.batch_matmul(FY, I);
    I = tf.batch_matmul(I, tf.transpose(FX, [0,2,1]))

    return tf.expand_dims(gamma,1)*tf.reshape(I, [-1, N*N])
Ejemplo n.º 21
0
 def threee_tensor_mul(A, B, C, res):
   # for example 
   # A = tf.ones([4, 3, 2], tf.int32)
   # B = tf.ones([4, 2, 5, 3], tf.int32)
   # C = tf.ones([4, 5, 6], tf.int32)
   # return: (4, 3, 6) which combine 3 channel of matrix multiplication
   c = B.get_shape().as_list()[-1]
   res += tf.batch_matmul(tf.batch_matmul(A, B), C)
   
   return res
Ejemplo n.º 22
0
    def build_generator(self):
        video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image])
        video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
	
	pos_mask = tf.placeholder(tf.float32,[self.batch_size])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b)
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
	image_emb = tf.mul(image_emb, tf.tile(tf.expand_dims(tf.expand_dims(pos_mask, 1), 1),[1, self.n_lstm_steps, self.dim_hidden]))
        image_emb = tf.concat(2,[image_emb, tf.tile(tf.expand_dims(1-tf.expand_dims(pos_mask,1), 1),[1, self.n_lstm_steps, 1])])
        image_emb = tf.transpose(image_emb, [1,0,2])

        state1 = tf.zeros([self.batch_size, self.lstm3.state_size])
        h_prev = tf.zeros([self.batch_size, self.dim_hidden])

        generated_words = []

        current_embed = tf.zeros([self.batch_size, self.dim_hidden])
	brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1])   # n x h x 1
	image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) +  self.embed_att_ba # n x b x h
        for i in range(16):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h
	    e = tf.batch_matmul(e, brcst_w)
	    e = tf.reduce_sum(e,2) # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b
	    denomin = tf.reduce_sum(e_hat_exp,0) # b
	    denomin = denomin + tf.to_float(tf.equal(denomin, 0))
	    alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden+1]) # n x b x h
	    attention_list = tf.mul(alphas, image_emb) # n x b x h                
            atten = tf.reduce_sum(attention_list,0) # b x h

            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3") as vs:
                output1, state1 = self.lstm3( tf.concat(1,[atten, current_embed]), state1 ) # b x h
		lstm3_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)]

	    output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h
            #with tf.variable_scope("LSTM2"):
            #    output2, state2 = self.lstm2( tf.concat(1,[current_embed,output1]), state2 )

            h_prev = output1
            logit_words = tf.nn.xw_plus_b( output2, self.embed_word_W, self.embed_word_b) # b x w
            max_prob_index = tf.argmax(logit_words, 1) # b
            generated_words.append(max_prob_index) # b

	    #current_embed = tf.matmul(logit_words,self.Wemb_W) + self.Wemb_b # b x h
	    #current_embed = tf.nn.xw_plus_b( logit_words, self.Wemb_W, self.Wemb_b) # b x h
            with tf.device("/cpu:0"):
            	current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index)
            #	current_embed = tf.expand_dims(current_embed, 0)

	generated_words = tf.transpose(tf.pack(generated_words))
        return video, video_mask, generated_words, pos_mask, lstm3_variables
Ejemplo n.º 23
0
 def get_function(points, mu, sigma):  # f_ik [n,k]
     div = coef * tf.rsqrt(tf.batch_matrix_determinant(sigma))  # ((2pi)^p*|S_k|)^-1/2  [k]
     div = tf.tile(tf.reshape(div, [1, k]), [n, 1])  # [n,k]
     diff = tf.sub(tf.tile(points, [k, 1, 1]), tf.tile(mu, [n, 1, 1]))  # x_i-u_k [n*k, p, 1]
     sigma = tf.tile(sigma, [n, 1, 1])  # [n*k,p,p]
     exp = tf.exp(
         -0.5
         * tf.batch_matmul(tf.transpose(diff, perm=[0, 2, 1]), tf.batch_matmul(tf.batch_matrix_inverse(sigma), diff))
     )  # e^(d'*S^-1*d)_ik [n*k, 1, 1]
     exp = tf.reshape(exp, [n, k])
     return tf.mul(div, exp)
Ejemplo n.º 24
0
def write_attn(h_dec):
    with tf.variable_scope("writeW",reuse=DO_SHARE):
        w=linear(h_dec,write_size) # batch x (write_n*write_n)
    N=write_n
    w=tf.reshape(w,[batch_size,N,N])
    Fx,Fy,gamma=attn_window("write",h_dec,write_n)
    Fyt=tf.transpose(Fy,perm=[0,2,1])
    wr=tf.batch_matmul(Fyt,tf.batch_matmul(w,Fx))
    wr=tf.reshape(wr,[batch_size,B*A])
    #gamma=tf.tile(gamma,[1,B*A])
    return wr*tf.reshape(1.0/gamma,[-1,1])
Ejemplo n.º 25
0
def write(h_dec):
  """Function to implement 29"""
  with tf.variable_scope("writeW",reuse=REUSE_T):
      w=linear(h_dec,write_size) # batch x (patch_write*patch_write)
  N=patch_write
  w=tf.reshape(w,[batch_size,N,N])
  Fx,Fy,gamma=attn_window("write",h_dec,patch_write)
  Fyt=tf.transpose(Fy,perm=[0,2,1])
  wr=tf.batch_matmul(Fyt,tf.batch_matmul(w,Fx))
  wr=tf.reshape(wr,[batch_size,B*A])
  #gamma=tf.tile(gamma,[1,B*A])
  return wr*tf.reshape(1.0/gamma,[-1,1])
Ejemplo n.º 26
0
    def train_graph(self, video, video_mask, caption, caption_mask):
        video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x n) x d
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (b x n) x h
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) # b x n x h
        image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h

	state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) # b x s
        h_prev = tf.zeros([self.batch_size, self.dim_hidden]) # b x h

	loss_caption =tf.zeros([self.batch_size])

        current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h
        brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1]) # n x h x 1
	image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) + self.embed_att_ba # n x b x h
        for i in range(16):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h
	    e = tf.batch_matmul(e, brcst_w)
	    e = tf.reduce_sum(e,2) # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b
	    denomin = tf.reduce_sum(e_hat_exp,0) # b
	    denomin = denomin + tf.to_float(tf.equal(denomin, 0))
	    alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden]) # n x b x h
	    attention_list = tf.mul(alphas, image_emb) # n x b x h
            atten = tf.reduce_sum(attention_list,0) # b x h

	    	#current_embed = tf.nn.xw_plus_b( onehot_labels, self.Wemb_W, self.Wemb_b) # b x h
                #with tf.device("/cpu:0"):
                #	current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i-1])
            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3"):
                output1, state1 = self.lstm3_dropout( tf.concat(1,[atten, current_embed]), state1 ) # b x h
	    output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h
            #with tf.variable_scope("LSTM2"):
            #    output2, state2 = self.lstm2_dropout( tf.concat(1,[current_embed, output1]), state2 )

            h_prev = output1 # b x h

            labels = tf.expand_dims(caption[:,i], 1) # b x 1
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1
            concated = tf.concat(1, [indices, labels]) # b x 2
            onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w
	    #current_embed = tf.matmul(onehot_labels,self.Wemb_W) + self.Wemb_b # b x h
	    with tf.device("/cpu:0"):
		current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i])

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # b x 1
            cross_entropy = cross_entropy * caption_mask[:,i] # b x 1

	    loss_caption += cross_entropy # 1
	loss_caption = loss_caption / tf.reduce_sum(caption_mask, 1)
	return loss_caption
Ejemplo n.º 27
0
    def build_model(self):
        video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) # b x n x d
        video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) # b x n

        caption = tf.placeholder(tf.int32, [self.batch_size, n_caption_step]) # b x 16
        caption_mask = tf.placeholder(tf.float32, [self.batch_size, n_caption_step]) # b x 16

        video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x n) x d
        image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (b x n) x h
        image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) # b x n x h
        image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h

        state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) # b x s
        h_prev = tf.zeros([self.batch_size, self.dim_hidden]) # b x h

        loss_caption = 0.0

        current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h
        brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1]) # n x h x 1
        image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) + self.embed_att_ba # n x b x h
        for i in range(n_caption_step):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h
            e = tf.batch_matmul(e, brcst_w)    # unnormalized relevance score 
            e = tf.reduce_sum(e,2) # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b 
            denomin = tf.reduce_sum(e_hat_exp,0) # b
            denomin = denomin + tf.to_float(tf.equal(denomin, 0))   # regularize denominator
            alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden]) # n x b x h  # normalize to obtain alpha
            attention_list = tf.mul(alphas, image_emb) # n x b x h
            atten = tf.reduce_sum(attention_list,0) # b x h       #  soft-attention weighted sum
            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3"):
                output1, state1 = self.lstm3_dropout( tf.concat(1,[atten, current_embed]), state1 ) # b x h

            output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h
            h_prev = output1 # b x h
            labels = tf.expand_dims(caption[:,i], 1) # b x 1
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1
            concated = tf.concat(1, [indices, labels]) # b x 2
            onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i])

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # b x 1
            cross_entropy = cross_entropy * caption_mask[:,i] # b x 1
            loss_caption += tf.reduce_sum(cross_entropy) # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss = loss_caption
        return loss, video, video_mask, caption, caption_mask
Ejemplo n.º 28
0
def sampleQ_psi(z,u,Q_phi):
  A,B,o,v,r=transition(z)
  with tf.variable_scope("sampleQ_psi"):
    mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1
    Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1])
    u=tf.expand_dims(u,-1) # batch,u_dim,1
    Bu=tf.squeeze(tf.batch_matmul(B,u),[-1])
    Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r)
    # the actual z_next sample is generated by deterministically transforming z_t
    z=tf.expand_dims(z,-1)
    Az=tf.squeeze(tf.batch_matmul(A,z),[-1])
    z_next=Az+Bu+o
    return z_next,Q_psi#,(A,B,o,v,r) # debugging
Ejemplo n.º 29
0
 def tensor(self, domain=None):
     if self.defined is not None:
         if domain is None:
             return self.defined(self.type_idx, self.domain.tensor)
         else:
             return self.defined(self.type_idx, domain.tensor)
     if domain is None:
         domain = self.domain
     X = domain.tensor
     XW = tf.batch_matmul(tf.tile(tf.expand_dims(X, 0), [self.number_of_layers, 1, 1]), self.W)
     XWX = tf.squeeze(tf.batch_matmul(tf.expand_dims(X, 1), tf.transpose(XW, [1, 2, 0])))
     XV = tf.matmul(X, tf.transpose(self.V))
     gX = tf.matmul(tf.tanh(XWX + XV + self.b), self.u)
     return tf.sigmoid(gX)
Ejemplo n.º 30
0
def model(input1, gating_network):

	input_times_w_plus_b = tf.reshape(tf.batch_matmul(w, input1), [L, L, n_aa]) + b
	#the softmax takes in a matrix have to do it myself
	exp_ = tf.exp(input_times_w_plus_b)
	sums = tf.reshape(tf.reduce_sum(exp_, 2), [L,L,1])
	try1 = tf.tile(sums, [1,1,n_aa])
	activation_function = exp_ / try1
	# activation_function = tf.sigmoid(input_times_w_plus_b)
	# activation_function = tf.nn.relu(softmaxed)
	use_gate = tf.batch_matmul(tf.transpose(activation_function, perm=[0, 2, 1]), tf.transpose(gating_network, perm=[0,1,2])) #perm=[1,0,2]
	# output = tf.nn.softmax(tf.reshape(use_gate, [L, n_aa]))
	output = tf.reshape(use_gate, [L, n_aa])
	return output
Ejemplo n.º 31
0
    def build_model(self):
        video = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_lstm_steps, self.dim_image])  # b x n x d
        video_mask = tf.placeholder(
            tf.float32, [self.batch_size, self.n_lstm_steps])  # b x n

        caption = tf.placeholder(tf.int32,
                                 [self.batch_size, n_caption_step])  # b x 16
        caption_mask = tf.placeholder(
            tf.float32, [self.batch_size, n_caption_step])  # b x 16

        video_flat = tf.reshape(video, [-1, self.dim_image])  # (b x n) x d
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)  # (b x n) x h
        image_emb = tf.reshape(
            image_emb,
            [self.batch_size, self.n_lstm_steps, self.dim_hidden])  # b x n x h
        image_emb = tf.transpose(image_emb, [1, 0, 2])  # n x b x h

        state1 = tf.zeros([self.batch_size, self.lstm3.state_size])  # b x s
        h_prev = tf.zeros([self.batch_size, self.dim_hidden])  # b x h

        loss_caption = 0.0

        current_embed = tf.zeros([self.batch_size, self.dim_hidden])  # b x h
        brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0),
                          [self.n_lstm_steps, 1, 1])  # n x h x 1
        image_part = tf.batch_matmul(
            image_emb,
            tf.tile(
                tf.expand_dims(self.embed_att_Ua, 0),
                [self.n_lstm_steps, 1, 1])) + self.embed_att_ba  # n x b x h
        for i in range(n_caption_step):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) +
                        image_part)  # n x b x h
            e = tf.batch_matmul(e, brcst_w)  # unnormalized relevance score
            e = tf.reduce_sum(e, 2)  # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e))  # n x b
            denomin = tf.reduce_sum(e_hat_exp, 0)  # b
            denomin = denomin + tf.to_float(tf.equal(
                denomin, 0))  # regularize denominator
            alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2),
                             [1, 1, self.dim_hidden
                              ])  # n x b x h  # normalize to obtain alpha
            attention_list = tf.mul(alphas, image_emb)  # n x b x h
            atten = tf.reduce_sum(
                attention_list,
                0)  # b x h       #  soft-attention weighted sum
            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3"):
                output1, state1 = self.lstm3_dropout(
                    tf.concat(1, [atten, current_embed]), state1)  # b x h

            output2 = tf.tanh(
                tf.nn.xw_plus_b(tf.concat(1, [output1, atten, current_embed]),
                                self.embed_nn_Wp, self.embed_nn_bp))  # b x h
            h_prev = output1  # b x h
            labels = tf.expand_dims(caption[:, i], 1)  # b x 1
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1),
                                     1)  # b x 1
            concated = tf.concat(1, [indices, labels])  # b x 2
            onehot_labels = tf.sparse_to_dense(
                concated, tf.pack([self.batch_size,
                                   self.n_words]), 1.0, 0.0)  # b x w
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,
                                                                          i])

            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)  # b x w
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                logit_words, onehot_labels)  # b x 1
            cross_entropy = cross_entropy * caption_mask[:, i]  # b x 1
            loss_caption += tf.reduce_sum(cross_entropy)  # 1

        loss_caption = loss_caption / tf.reduce_sum(caption_mask)
        loss = loss_caption
        return loss, video, video_mask, caption, caption_mask
Ejemplo n.º 32
0
    def build_graph(self):
        with self.graph.as_default():
            
            # placeholders
            self.X = tf.placeholder(tf.int64, [None, self.n_features], name='X')
            self.Y = tf.placeholder(tf.float32, (None), name='Y')
            
            # list of TT-cores
            self.G = [None]*self.n_features

            # list of TT-cores used for penalty
            self.G_exp = [None]*self.n_features
            
            for i in range(self.n_features):

                shape = [self.s_features[i] + 1, self.rank, self.rank]
                if i==0:
                    shape = [self.s_features[i] + 1, 1, self.rank]
                if i==(self.n_features - 1):
                    shape = [self.s_features[i] + 1, self.rank, 1]

                content = None
                if self.init_vals is None:
                    content = tf.random_normal(shape, stddev=self.init_std)
                else:
                    assert(self.init_vals[i].shape==tuple(shape))
                    content = self.init_vals[i] + tf.random_normal(shape, stddev=self.init_std)

                self.G[i] = tf.Variable(content, trainable=True, name='G_{}'.format(i))
                exp_weights = tf.constant([1] + [self.exp_reg] * self.s_features[i], shape=(self.s_features[i] + 1, 1, 1))
                self.G_exp[i] = self.G[i] * exp_weights

            # main computation part
            cur_col = self.X[:, 0]
            tower = tf.gather(self.G[0], cur_col)
            self.outputs = tf.add(self.G[0][0], tower)
            for i in range(1, self.n_features):
                cur_col = self.X[:, i]
                cur_tower = tf.gather(self.G[i], cur_col)
                cur_A = tf.add(self.G[i][0], cur_tower)
                self.outputs = tf.batch_matmul(self.outputs, cur_A)
            self.outputs = tf.squeeze(self.outputs, [1, 2])
            
            # regularization penalty
            self.penalty = tf.reshape(
                tensor=tf.einsum('nip,njq->ijpq', self.G_exp[0], self.G_exp[0]),
                shape=(1, self.rank**2)
            )
            for i in range(1, self.n_features):
                last_dim = 1 if i==self.n_features-1 else self.rank**2
                summed_kron_prod = tf.reshape(
                    tensor=tf.einsum('nip,njq->ijpq', self.G_exp[i], self.G_exp[i]),
                    shape=(self.rank**2, last_dim)
                )
                self.penalty = tf.matmul(self.penalty, summed_kron_prod)

            # MSE loss
            self.loss = tf.reduce_mean((self.outputs - self.Y)**2)
            # # LogLoss
            # self.margins = -self.Y * self.outputs
            # sself.raw_loss = tf.log(tf.add(1.0, tf.exp(self.margins)))
            # self.loss = tf.reduce_mean(tf.minimum(self.raw_loss, 100, name='truncated_log_loss'))
            self.penalized_loss = self.loss + self.reg * tf.squeeze(self.penalty)

            # others
            self.trainer = tf.train.AdamOptimizer(0.001).minimize(self.penalized_loss)
            self.init_all_vars = tf.initialize_all_variables()
            self.saver = tf.train.Saver()
Ejemplo n.º 33
0
    def build(self):
        tf.reset_default_graph()

        with tf.variable_scope("graph", initializer=orthogonal_initializer()):
            # Variables (matrix of embeddings/transformations)

            self._ht = ht = tf.get_variable(
                name='ht',  # for t AND h
                shape=[self.num_cons, self.dim],
                dtype=tf.float32)
            self._r = r = tf.get_variable(name='r',
                                          shape=[self.num_rels, self.dim],
                                          dtype=tf.float32)
            # Mh has |r| number of matrices, each dedicated to a relation
            self._Mh = Mh = tf.get_variable(
                name='Mh',
                shape=[self.num_rels, self.dim * self.dim],
                dtype=tf.float32,
            )

            self._ht_assign = ht_assign = tf.placeholder(
                name='ht_assign',
                shape=[self.num_cons, self.dim],
                dtype=tf.float32)
            self._r_assign = r_assign = tf.placeholder(
                name='r_assign',
                shape=[self.num_rels, self.dim],
                dtype=tf.float32)
            self._m_assign = m_assign = tf.placeholder(
                name='r_assign',
                shape=[self.num_rels, self.dim * self.dim],
                dtype=tf.float32)

            # Type A loss : [|| M_hr h + r - M_tr t ||_2 + m1 - || M_hr h' + r - M_tr t' ||_2]+    here [.]+ means max (. , 0)
            self._A_h_index = A_h_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='A_h_index')
            self._A_r_index = A_r_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='A_r_index')
            self._A_t_index = A_t_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='A_t_index')
            self._A_hn_index = A_hn_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='A_hn_index')
            self._A_tn_index = A_tn_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='A_tn_index')
            '''
            A_loss_matrix = tf.subtract(
                tf.add(
                    tf.batch_matmul(A_h_con_batch, tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])),
                    A_rel_batch),
                tf.batch_matmul(A_t_con_batch, tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim]))
            )'''

            # a batch of vectors multiply a batch of matrices.
            A_h_con_batch = tf.nn.embedding_lookup(ht, A_h_index)
            A_t_con_batch = tf.nn.embedding_lookup(ht, A_t_index)
            A_rel_batch = tf.nn.embedding_lookup(r, A_r_index)
            A_mat_h_batch = tf.nn.embedding_lookup(Mh, A_r_index)
            #A_mat_t_batch = tf.nn.embedding_lookup(Mt, A_r_index)
            A_hn_con_batch = tf.nn.embedding_lookup(ht, A_hn_index)
            A_tn_con_batch = tf.nn.embedding_lookup(ht, A_tn_index)
            # This is a batch of h * M_hr given a batch of (h, r, t)
            A_h_batch_mul = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(A_h_con_batch, 1),
                    tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # This is a batch of t * M_hr given a batch of (h, r, t)
            A_t_batch_mul = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(A_t_con_batch, 1),
                    tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # negative sampled h and t
            A_hn_batch_mul = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(A_hn_con_batch, 1),
                    tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1])
            A_tn_batch_mul = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(A_tn_con_batch, 1),
                    tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1])

            # This stores h M_hr + r - t M_tr
            A_loss_matrix = tf.subtract(tf.add(A_h_batch_mul, A_rel_batch),
                                        A_t_batch_mul)
            # This stores h' M_hr + r - t' M_tr for negative samples
            A_neg_matrix = tf.subtract(tf.add(A_hn_batch_mul, A_rel_batch),
                                       A_tn_batch_mul)
            # L-2 norm
            # [||h M_hr + r - t M_tr|| + m1 - ||h' M_hr + r - t' M_tr||)]+     here [.]+ means max (. , 0)
            if self.L1:
                self._A_loss = A_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(tf.reduce_sum(tf.abs(A_loss_matrix), 1),
                                   self._m1),
                            tf.reduce_sum(tf.abs(A_neg_matrix), 1)), 0.))
            else:
                self._A_loss = A_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(
                                tf.sqrt(
                                    tf.reduce_sum(tf.square(A_loss_matrix),
                                                  1)), self._m1),
                            tf.sqrt(tf.reduce_sum(tf.square(A_neg_matrix),
                                                  1))), 0.))

            # soft-constraint on vector norms for both positive and negative sampled h and t
            # [||h|| - 1]+  +  [||t|| - 1]+  +  [||h'|| - 1]+  +  [||t'|| - 1]+
            #A_vec_restraint = tf.concat(0, [tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_h_con_batch), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_t_con_batch), 1)), 1.), 0.),             tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_hn_con_batch), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_tn_con_batch), 1)), 1.), 0.)])

            A_vec_restraint = tf.concat([
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(A_h_con_batch), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(A_t_con_batch), 1)),
                        1.), 0.)
            ], 0)
            # soft-constraint on projected vectors for both positive and negative sampled h and t
            #A_proj_restraint = tf.concat(0, [tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_h_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_t_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_hn_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_tn_batch_mul), 1)), 1.), 0.)])

            A_proj_restraint = tf.concat([
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(A_h_batch_mul), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(A_t_batch_mul), 1)),
                        1.), 0.)
            ], 0)
            A_rel_restraint = tf.maximum(
                tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_rel_batch), 1)),
                            2.), 0.)

            # Type B loss :
            # 2 losses: t-related <- omega(M_t o1, M_t o2) and h-related <- omega(M_h o1, M_h o2)
            # Let's use || a M_hr + r - b M_tr ||_2 as omega(a,b)
            # They share the same input place holders
            # Negative sampling samples only the "many" end

            self._B_h_index = B_h_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='B_h_index')
            self._B_r_index = B_r_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='B_r_index')
            self._B_t_index = B_t_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='B_t_index')
            # negative sampled h and t
            self._B_hn_index = B_hn_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='B_hn_index')
            self._B_tn_index = B_tn_index = tf.placeholder(
                dtype=tf.int64, shape=[self.batch_size], name='B_tn_index')

            B_con_h_batch = tf.nn.embedding_lookup(ht, B_h_index)
            B_con_t_batch = tf.nn.embedding_lookup(ht, B_t_index)
            B_mat_h_batch = tf.nn.embedding_lookup(Mh, B_r_index)
            #B_mat_t_batch = tf.nn.embedding_lookup(Mt, B_r_index)
            B_rel_batch = tf.nn.embedding_lookup(r, B_r_index)
            B_con_hn_batch = tf.nn.embedding_lookup(ht, B_hn_index)
            B_con_tn_batch = tf.nn.embedding_lookup(ht, B_tn_index)
            # multiplication of a batch of vectors and a batch of matrices
            B_t_batch_mul_head = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_h_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            B_t_batch_mul_tail = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_t_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # multiplication of a batch of vectors and a batch of matrices for negative samples
            B_tn_batch_mul_head = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_hn_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            B_tn_batch_mul_tail = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_tn_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # t*M_hr + r ~ t*M_tr
            # This stores h M_hr + r - t M_tr for more t's of the singular h's. Below it is the one for negative samples
            B_t_loss_matrix = tf.subtract(
                tf.add(B_t_batch_mul_head, B_rel_batch), B_t_batch_mul_tail)
            B_tn_loss_matrix = tf.subtract(
                tf.add(B_tn_batch_mul_head, B_rel_batch), B_tn_batch_mul_tail)

            # [||h M_hr + r - t M_tr|| + m1 - ||h M_hr + r - t' M_tr||]+   Actually only t is corrupted for B_t related batches
            if self.L1:
                self._B_t_loss = B_t_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(tf.reduce_sum(tf.abs(B_t_loss_matrix), 1),
                                   self._m2),
                            tf.reduce_sum(tf.abs(B_tn_loss_matrix), 1)), 0.))
            else:
                self._B_t_loss = B_t_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(
                                tf.sqrt(
                                    tf.reduce_sum(tf.square(B_t_loss_matrix),
                                                  1)), self._m2),
                            tf.sqrt(
                                tf.reduce_sum(tf.square(B_tn_loss_matrix),
                                              1))), 0.))

            # multiplication of a batch of vectors and a batch of matrices
            B_h_batch_mul_head = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_h_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            B_h_batch_mul_tail = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_t_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # multiplication of a batch of vectors and a batch of matrices for negative samples
            B_hn_batch_mul_head = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_hn_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            B_hn_batch_mul_tail = tf.squeeze(
                tf.batch_matmul(
                    tf.expand_dims(B_con_tn_batch, 1),
                    tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1])
            # t*M_tr - r ~ h*M_hr
            # This stores h M_hr + r - t M_tr for more h's of the singular t's. Below it is the one for negative samples
            B_h_loss_matrix = tf.subtract(
                tf.subtract(B_h_batch_mul_tail, B_rel_batch),
                B_h_batch_mul_head)
            B_hn_loss_matrix = tf.subtract(
                tf.subtract(B_hn_batch_mul_tail, B_rel_batch),
                B_hn_batch_mul_head)

            #  [||t M_tr - r - h M_hr|| + m2 - ||t M_tr - r - h M_hr|| ]+      Actually only h is corrupted for B_h related batches
            if self.L1:
                self._B_h_loss = B_h_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(tf.reduce_sum(tf.abs(B_h_loss_matrix), 1),
                                   self._m2),
                            tf.reduce_sum(tf.abs(B_hn_loss_matrix), 1)), 0.))
            else:
                self._B_h_loss = B_h_loss = tf.reduce_sum(
                    tf.maximum(
                        tf.subtract(
                            tf.add(
                                tf.sqrt(
                                    tf.reduce_sum(tf.square(B_h_loss_matrix),
                                                  1)), self._m2),
                            tf.sqrt(
                                tf.reduce_sum(tf.square(B_hn_loss_matrix),
                                              1))), 0.))

            # penalize on pre- and post-projected vectors whose norm exceeds 1

            B_vec_restraint = tf.concat([
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_con_h_batch), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_con_t_batch), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_con_hn_batch), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_con_tn_batch), 1)),
                        1.), 0.)
            ], 0)
            B_t_proj_restraint = tf.concat([
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_t_batch_mul_head),
                                              1)), 1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_t_batch_mul_tail),
                                              1)), 1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(
                            tf.reduce_sum(tf.square(B_tn_batch_mul_head), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(
                            tf.reduce_sum(tf.square(B_tn_batch_mul_tail), 1)),
                        1.), 0.)
            ], 0)
            B_h_proj_restraint = tf.concat([
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_h_batch_mul_head),
                                              1)), 1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(tf.reduce_sum(tf.square(B_h_batch_mul_tail),
                                              1)), 1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(
                            tf.reduce_sum(tf.square(B_hn_batch_mul_head), 1)),
                        1.), 0.),
                tf.maximum(
                    tf.subtract(
                        tf.sqrt(
                            tf.reduce_sum(tf.square(B_hn_batch_mul_tail), 1)),
                        1.), 0.)
            ], 0)
            B_rel_restraint = tf.maximum(
                tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(B_rel_batch), 1)),
                            2.), 0.)

            # Type C loss : Soft-constraint on vector norms
            #self._C_loss = C_loss = tf.reduce_sum(tf.concat(0, [A_vec_restraint, B_vec_restraint, A_proj_restraint, B_t_proj_restraint, B_h_proj_restraint, A_rel_restraint, B_rel_restraint]))
            #self._C_loss = C_loss = tf.reduce_sum(tf.concat(0, [A_vec_restraint, B_vec_restraint, A_proj_restraint, B_t_proj_restraint, B_h_proj_restraint]))
            self._C_loss_A = C_loss_A = tf.reduce_sum(
                tf.concat([A_vec_restraint, A_proj_restraint, A_rel_restraint],
                          0))
            self._C_loss_B1 = C_loss_B1 = tf.reduce_sum(
                tf.concat(
                    [B_vec_restraint, B_t_proj_restraint, B_rel_restraint], 0))
            self._C_loss_B2 = C_loss_B2 = tf.reduce_sum(
                tf.concat(
                    [B_vec_restraint, B_h_proj_restraint, B_rel_restraint], 0))

            # Force normalize pre-projected vecs

            # Optimizer
            self._lr = lr = tf.placeholder(tf.float32)
            self._opt = opt = tf.train.GradientDescentOptimizer(lr)
            self._train_op_A = train_op_A = opt.minimize(A_loss)
            self._train_op_B_t = train_op_B_t = opt.minimize(B_t_loss)
            self._train_op_B_h = train_op_B_h = opt.minimize(B_h_loss)
            #self._train_op_C = train_op_C = opt.minimize(C_loss)
            self._train_op_C_A = train_op_C_A = opt.minimize(C_loss_A)
            self._train_op_C_B1 = train_op_C_B1 = opt.minimize(C_loss_B1)
            self._train_op_C_B2 = train_op_C_B2 = opt.minimize(C_loss_B2)

            self._assign_ht_op = assign_ht_op = ht.assign(ht_assign)
            self._assign_r_op = assign_r_op = self._r.assign(r_assign)
            self._assign_m_op = assign_m_op = self._Mh.assign(m_assign)

            # Saver
            self._saver = tf.train.Saver()
Ejemplo n.º 34
0
    def _build_encoder(self):
        """Builds coattention encoder."""
        # most used variables
        params = self._params
        batch_size = params.batch_size
        hidden_size = params.hidden_size
        min_timesteps = params.q_timesteps
        max_timesteps = params.c_timesteps

        with tf.variable_scope('embedding') as vs, tf.device(
                self._next_device()):
            # fixed embedding
            embedding = tf.get_variable(
                'embedding', [self._vsize, params.emb_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=1e-4),
                trainable=False)
            # embed c_inputs and q_inputs.
            fn = lambda x: tf.nn.embedding_lookup(embedding, x)
            c_vector = tf.map_fn(lambda x: fn(x),
                                 self._contexts,
                                 dtype=tf.float32)
            c_embedding = tf.transpose(c_vector, perm=[1, 0, 2])
            q_vector = tf.map_fn(lambda x: fn(x),
                                 self._questions,
                                 dtype=tf.float32)
            q_embedding = tf.transpose(q_vector, perm=[1, 0, 2])
            # shared lstm encoder
            lstm_enc = tf.nn.rnn_cell.LSTMCell(hidden_size)

        with tf.variable_scope('c_embedding'), tf.device(self._next_device()):
            # compute context embedding
            c, _ = tf.nn.dynamic_rnn(lstm_enc, c_embedding, dtype=tf.float32)
            # append sentinel
            fn = lambda x: tf.concat(
                0, [x, tf.zeros([1, hidden_size], dtype=tf.float32)])
            c_encoding = tf.map_fn(lambda x: fn(x), c, dtype=tf.float32)

        with tf.variable_scope('q_embedding'), tf.device(self._next_device()):
            # compute question embedding
            q, _ = tf.nn.dynamic_rnn(lstm_enc, q_embedding, dtype=tf.float32)
            # append sentinel
            fn = lambda x: tf.concat(
                0, [x, tf.zeros([1, hidden_size], dtype=tf.float32)])
            q_encoding = tf.map_fn(lambda x: fn(x), q, dtype=tf.float32)
            # allow variation between c_embedding and q_embedding
            q_encoding = tf.tanh(
                batch_linear(q_encoding, min_timesteps + 1, True))
            q_variation = tf.transpose(q_encoding, perm=[0, 2, 1])

        with tf.variable_scope('coattention'), tf.device(self._next_device()):
            # compute affinity matrix, (batch_size, context+1, question+1)
            L = tf.batch_matmul(c_encoding, q_variation)
            # shape = (batch_size, question+1, context+1)
            L_t = tf.transpose(L, perm=[0, 2, 1])
            # normalize with respect to question
            a_q = tf.map_fn(lambda x: tf.nn.softmax(x), L_t, dtype=tf.float32)
            # normalize with respect to context
            a_c = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32)
            # summaries with respect to question, (batch_size, question+1, hidden_size)
            c_q = tf.batch_matmul(a_q, c_encoding)
            c_q_emb = tf.concat(
                1,
                [q_variation, tf.transpose(c_q, perm=[0, 2, 1])])
            # summaries of previous attention with respect to context
            c_d = tf.batch_matmul(c_q_emb, a_c, adj_y=True)
            # final coattention context, (batch_size, context+1, 3*hidden_size)
            co_att = tf.concat(
                2, [c_encoding, tf.transpose(c_d, perm=[0, 2, 1])])

        with tf.variable_scope('encoder'), tf.device(self._next_device()):
            # LSTM for coattention encoding
            cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size)
            cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size)
            # compute coattention encoding
            u, _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                co_att,
                sequence_length=tf.to_int64([max_timesteps] * batch_size),
                dtype=tf.float32)
            self._u = tf.concat(2, u)
Ejemplo n.º 35
0
 def test_BatchMatMul(self):
     t = tf.batch_matmul(*self.random((2, 4, 3, 4), (2, 4, 3, 5)),
                         adj_x=True)
     self.check(t)
Ejemplo n.º 36
0
def create(model, config):
    dim_v, dim_i, dim_d, dim_t, dim_m, dim_b, dim_n, dim_c = config.getint(
        'vocabsize'), config.getint('wvecsize'), config.getint(
            'depth'), config.getint('steps'), config.getint(
                'memory'), config.getint('batch'), config.getint(
                    'deepness'), config.getint('classes')
    lrate_ms, dstep_ms, drate_ms, optim_ms = config.getfloat(
        'mslrate'), config.getint('msdstep'), config.getfloat(
            'msdrate'), getattr(tf.train, config.get('msoptim'))
    lrate_ce, dstep_ce, drate_ce, optim_ce = config.getfloat(
        'celrate'), config.getint('cedstep'), config.getfloat(
            'cedrate'), getattr(tf.train, config.get('ceoptim'))

    with tf.name_scope('embedding'):
        model['We'] = tf.Variable(tf.truncated_normal([dim_v, dim_i],
                                                      stddev=1.0 / dim_i),
                                  name='We')
        model['Be'] = tf.Variable(tf.truncated_normal([1, dim_i],
                                                      stddev=1.0 / dim_i),
                                  name='Be')

    with tf.name_scope('plstm'):
        with tf.name_scope('input'):
            for ii in xrange(dim_t):
                model['pxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='pxi_%i' % ii)
                model['px_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['pxi_%i' % ii]),
                                             model['Be'],
                                             name='px_%i' % ii)

        with tf.name_scope('label'):
            for ii in xrange(dim_t):
                model['pyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='pyi_%i' % ii)
                model['py_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['pyi_%i' % ii]),
                                             model['Be'],
                                             name='py_%i' % ii)

        for i in xrange(dim_d):
            with tf.name_scope('input_%i' % i):
                for ii in xrange(dim_t):
                    model['px_%i_%i' %
                          (i,
                           ii)] = model['px_%i' %
                                        ii] if i == 0 else model['ph_%i_%i' %
                                                                 (i - 1, ii)]

            with tf.name_scope('inputgate_%i' % i):
                model['pWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='pWi_%i' % i)
                model['pBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='pBi_%i' % i)
                for ii in xrange(dim_t):
                    model['pi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['px_%i_%i' % (i, ii)],
                                      model['pWi_%i' % i]),
                            model['pBi_%i' % i]),
                        name='pi_%i_%i' % (i, ii))

            with tf.name_scope('forgetgate_%i' % i):
                model['pWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='pWf_%i' % i)
                model['pBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='pBf_%i' % i)
                for ii in xrange(dim_t):
                    model['pf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['px_%i_%i' % (i, ii)],
                                      model['pWf_%i' % i]),
                            model['pBf_%i' % i]),
                        name='pf_%i_%i' % (i, ii))

            with tf.name_scope('outputgate_%i' % i):
                model['pWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='pWo_%i' % i)
                model['pBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='pBo_%i' % i)
                for ii in xrange(dim_t):
                    model['po_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['px_%i_%i' % (i, ii)],
                                      model['pWo_%i' % i]),
                            model['pBo_%i' % i]),
                        name='po_%i_%i' % (i, ii))

            with tf.name_scope('cellstate_%i' % i):
                model['pWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='pWc_' + str(i))
                model['pBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='pBc_' + str(i))
                for ii in xrange(dim_t):
                    model['pcc_%i_%i' % (i, ii)] = tf.Variable(
                        tf.truncated_normal([dim_b, dim_i], stddev=1.0 /
                                            dim_i),
                        name='pcc_%i_%i' % (i, ii)) if ii == 0 else model[
                            'pc_%i_%i' %
                            (i, ii - 1)]  # consider starting with all zeros
                    model['pc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['pxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['pcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['pf_%i_%i' % (i, ii)],
                                   model['pcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['pi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['px_%i_%i' % (i, ii)],
                                                  model['pWc_%i' % i]),
                                        model['pBc_%i' % i])))),
                        name='pc_%i_%i' % (i, ii))

            with tf.name_scope('hidden_%i' % i):
                model['pWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='pWz_%i' % i)
                model['pBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='pBz_%i' % i)
                for ii in xrange(dim_t):
                    model['pz_%i_%i' % (i, ii)] = tf.add(
                        tf.matmul(model['pc_%i_%i' % (i, ii)],
                                  model['pWz_%i' % i]),
                        model['pBz_%i' % i],
                        name='pz_%i_%i' % (i, ii))

            with tf.name_scope('output_%i' % i):
                for ii in xrange(dim_t):
                    model['ph_%i_%i' % (i, ii)] = tf.mul(
                        model['po_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['pz_%i_%i' % (i, ii)]),
                        name='ph_%i_%i' % (i, ii))

        with tf.name_scope('output'):
            for ii in xrange(dim_t):
                model['ph_%i' % ii] = model['ph_%i_%i' % (dim_d - 1, ii)]

        with tf.name_scope('meansquared'):
            for ii in xrange(dim_t):
                model['pms_%i' %
                      ii] = tf.select(tf.equal(model['pxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['py_%i' % ii],
                                                     model['ph_%i' % ii])),
                                          [1]),
                                      name='pms_%i' % ii)
            model['pms'] = tf.reduce_sum(tf.add_n(
                [model['pms_%i' % ii] for ii in xrange(dim_t)]),
                                         name='pms')
            model['spms'] = tf.scalar_summary(model['pms'].name, model['pms'])

    with tf.name_scope('memory'):
        for i in xrange(dim_d):
            model['hmi_%i' % i] = tf.reshape([
                model['pc_%i_%i' % (i, ii)]
                for ii in xrange(dim_t - dim_m, dim_t)
            ], [dim_t, dim_b, dim_i],
                                             name='hmi_%i' % i)
            model['hm_%i' % i] = tf.transpose(model['hmi_%i' % i], [1, 0, 2],
                                              name='hm_%i' % i)

    with tf.name_scope('hlstm'):
        with tf.name_scope('input'):
            for ii in xrange(dim_t):
                model['hxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='hxi_%i' % ii)
                model['hx_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['hxi_%i' % ii]),
                                             model['Be'],
                                             name='hx_%i' % ii)

        with tf.name_scope('label'):
            for ii in xrange(dim_t):
                model['hyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b],
                                                      name='hyi_%i' % ii)
                model['hy_%i' % ii] = tf.add(tf.nn.embedding_lookup(
                    model['We'], model['hyi_%i' % ii]),
                                             model['Be'],
                                             name='hy_%i' % ii)

        for i in xrange(dim_d):
            with tf.name_scope('input_%i' % i):
                for ii in xrange(dim_t):
                    model['hx_%i_%i' %
                          (i,
                           ii)] = model['hx_%i' %
                                        ii] if i == 0 else model['hh_%i_%i' %
                                                                 (i - 1, ii)]

            with tf.name_scope('inputgate_%i' % i):
                model['hWi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWi_%i' % i)
                model['hBi_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='hBi_%i' % i)
                for ii in xrange(dim_t):
                    model['hi_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hx_%i_%i' % (i, ii)],
                                      model['hWi_%i' % i]),
                            model['hBi_%i' % i]),
                        name='hi_%i_%i' % (i, ii))

            with tf.name_scope('forgetgate_%i' % i):
                model['hWf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWf_%i' % i)
                model['hBf_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='hBf_%i' % i)
                for ii in xrange(dim_t):
                    model['hf_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hx_%i_%i' % (i, ii)],
                                      model['hWf_%i' % i]),
                            model['hBf_%i' % i]),
                        name='hf_%i_%i' % (i, ii))

            with tf.name_scope('outputgate_%i' % i):
                model['hWo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWo_%i' % i)
                model['hBo_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='hBo_%i' % i)
                for ii in xrange(dim_t):
                    model['ho_%i_%i' % (i, ii)] = tf.nn.sigmoid(
                        tf.add(
                            tf.matmul(model['hx_%i_%i' % (i, ii)],
                                      model['hWo_%i' % i]),
                            model['hBo_%i' % i]),
                        name='ho_%i_%i' % (i, ii))

            with tf.name_scope('cellstate_%i' % i):
                model['hWc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWc_' + str(i))
                model['hBc_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='hBc_' + str(i))
                for ii in xrange(dim_t):
                    model['hcc_%i_%i' % (i, ii)] = model[
                        'pc_%i_%i' %
                        (i, dim_t - 1)] if ii == 0 else model['hc_%i_%i' %
                                                              (i, ii - 1)]
                    model['hc_%i_%i' % (i, ii)] = tf.select(
                        tf.equal(model['hxi_%i' % ii],
                                 tf.zeros([dim_b], tf.int32)),
                        model['hcc_%i_%i' % (i, ii)],
                        tf.add(
                            tf.mul(model['hf_%i_%i' % (i, ii)],
                                   model['hcc_%i_%i' % (i, ii)]),
                            tf.mul(
                                model['hi_%i_%i' % (i, ii)],
                                tf.nn.tanh(
                                    tf.add(
                                        tf.matmul(model['hx_%i_%i' % (i, ii)],
                                                  model['hWc_%i' % i]),
                                        model['hBc_%i' % i])))),
                        name='hc_%i_%i' % (i, ii))

            with tf.name_scope('attention_%i' % i):
                model['hWa_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWa_%i' % i)
                model['hBa_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_t, 1], stddev=1.0 / dim_t),
                                                  name='hBa_%i' % i)
                for ii in xrange(dim_t):
                    model['hat_%i_%i' % (i, ii)] = tf.nn.softmax(
                        tf.add(
                            tf.reshape(
                                tf.transpose(
                                    tf.batch_matmul(
                                        model['hm_%i' % i],
                                        tf.reshape(
                                            tf.transpose(
                                                tf.matmul(
                                                    model['hWa_%i' % i],
                                                    tf.transpose(
                                                        model['hc_%i_%i' %
                                                              (i, ii)]))),
                                            [dim_b, dim_i, 1]))),
                                [dim_t, dim_b]), model['hBa_%i' % i]),
                        name='hat_%i_%i' % (i, ii))
                    model['hcx_%i_%i' %
                          (i, ii)] = tf.reshape(tf.batch_matmul(
                              tf.reshape(
                                  tf.transpose(model['hat_%i_%i' % (i, ii)]),
                                  [dim_b, 1, dim_t]), model['hm_%i' % i]),
                                                [dim_b, dim_i],
                                                name='hcx_%i_%i' % (i, ii))

            with tf.name_scope('hidden_%i' % i):
                model['hWx_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWx_%i' % i)
                model['hWz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [dim_i, dim_i], stddev=1.0 / dim_i),
                                                  name='hWz_%i' % i)
                model['hBz_%i' % i] = tf.Variable(tf.truncated_normal(
                    [1, dim_i], stddev=1.0 / dim_i),
                                                  name='hBz_%i' % i)
                for ii in xrange(dim_t):
                    model['hz_%i_%i' % (i, ii)] = tf.add(tf.add(
                        tf.matmul(model['hcx_%i_%i' % (i, ii)],
                                  model['hWx_%i' % i]),
                        tf.matmul(model['hc_%i_%i' % (i, ii)],
                                  model['hWz_%i' % i])),
                                                         model['hBz_%i' % i],
                                                         name='hz_%i_%i' %
                                                         (i, ii))

            with tf.name_scope('output_%i' % i):
                for ii in xrange(dim_t):
                    model['hh_%i_%i' % (i, ii)] = tf.mul(
                        model['ho_%i_%i' % (i, ii)],
                        tf.nn.tanh(model['hz_%i_%i' % (i, ii)]),
                        name='hh_%i_%i' % (i, ii))

        with tf.name_scope('output'):
            for ii in xrange(dim_t):
                model['hh_%i' % ii] = model['hh_%i_%i' % (dim_d - 1, ii)]

        with tf.name_scope('meansquared'):
            for ii in xrange(dim_t):
                model['hms_%i' %
                      ii] = tf.select(tf.equal(model['hxi_%i' % ii],
                                               tf.zeros([dim_b], tf.int32)),
                                      tf.zeros([dim_b], tf.float32),
                                      tf.reduce_sum(
                                          tf.square(
                                              tf.sub(model['hy_%i' % ii],
                                                     model['hh_%i' % ii])),
                                          [1]),
                                      name='hms_%i' % ii)
            model['hms'] = tf.reduce_sum(tf.add_n(
                [model['hms_%i' % ii] for ii in xrange(dim_t)]),
                                         name='hms')
            model['shms'] = tf.scalar_summary(model['hms'].name, model['hms'])

    with tf.name_scope('classification'):
        with tf.name_scope('label'):
            model['clabel'] = tf.placeholder(tf.float32, [dim_b, dim_c],
                                             name='clabel')

        for i in xrange(dim_n):
            with tf.name_scope('layer_%i' % i):
                model['cW_%i' % i] = tf.Variable(
                    tf.truncated_normal([2 * dim_i, 2 * dim_i],
                                        stddev=0.5 / dim_i),
                    name='cW_%i' %
                    i) if i != dim_n - 1 else tf.Variable(tf.truncated_normal(
                        [2 * dim_i, dim_c], stddev=1.0 / dim_c),
                                                          name='cW_%i' % i)
                model['cB_%i' % i] = tf.Variable(
                    tf.truncated_normal([1, 2 * dim_i], stddev=0.5 / dim_i),
                    name='cB_%i' % i) if i != dim_n - 1 else tf.Variable(
                        tf.truncated_normal([1, dim_c], stddev=1.0 / dim_c),
                        name='cB_%i' % i)
                model['cx_%i' %
                      i] = tf.concat(1, [
                          model['ph_%i' % (dim_t - 1)], model['hh_%i' %
                                                              (dim_t - 1)]
                      ],
                                     name='cx_%i' %
                                     i) if i == 0 else model['cy_%i' % (i - 1)]
                model['cy_%i' % i] = tf.add(tf.matmul(model['cx_%i' % i],
                                                      model['cW_%i' % i]),
                                            model['cB_%i' % i],
                                            name='cy_%i' % i)

        with tf.name_scope('output'):
            model['output'] = tf.nn.softmax(model['cy_%i' % (dim_n - 1)],
                                            name='output')

        with tf.name_scope('crossentropy'):
            model['cce'] = tf.reduce_sum(
                -tf.mul(model['clabel'], tf.log(model['output'])), name='cce')
            model['scce'] = tf.scalar_summary(model['cce'].name, model['cce'])

    model['gsms'] = tf.Variable(0, trainable=False, name='gsms')
    model['lrms'] = tf.train.exponential_decay(lrate_ms,
                                               model['gsms'],
                                               dstep_ms,
                                               drate_ms,
                                               staircase=False,
                                               name='lrms')
    model['tms'] = optim_ms(model['lrms']).minimize(model['pms'] +
                                                    model['hms'],
                                                    global_step=model['gsms'],
                                                    name='tms')

    model['gsce'] = tf.Variable(0, trainable=False, name='gsce')
    model['lrce'] = tf.train.exponential_decay(lrate_ce,
                                               model['gsce'],
                                               dstep_ce,
                                               drate_ce,
                                               staircase=False,
                                               name='lrce')
    model['tce'] = optim_ce(model['lrce']).minimize(model['cce'],
                                                    global_step=model['gsce'],
                                                    name='tce')

    return model
Ejemplo n.º 37
0
'''
scope = 'encode_x'
x_hat_encode = make_conv_net(x_hat, scope)
#x_hat_inv_mag = tf.rsqrt(tf.clip_by_value(tf.reduce_sum(tf.square(x_hat_encode),1,keep_dims=True),eps,float("inf")))
cos_sim_list = []
if not tie:
    scope = 'encode_x_i'
for i in range(n_samples):
    x_i_encode = make_conv_net(x_i[:, i, :, :, :], scope, tie or i > 0,
                               not x_i_learn)
    x_i_inv_mag = tf.rsqrt(
        tf.clip_by_value(
            tf.reduce_sum(tf.square(x_i_encode), 1, keep_dims=True), eps,
            float("inf")))
    dotted = tf.squeeze(
        tf.batch_matmul(tf.expand_dims(x_hat_encode, 1),
                        tf.expand_dims(x_i_encode, 2)), [
                            1,
                        ])
    cos_sim_list.append(dotted * x_i_inv_mag)
    #*x_hat_inv_mag
cos_sim = tf.concat(1, cos_sim_list)
tf.histogram_summary('cos sim', cos_sim)
weighting = tf.nn.softmax(cos_sim)
label_prob = tf.squeeze(tf.batch_matmul(tf.expand_dims(weighting, 1), y_i))
tf.histogram_summary('label prob', label_prob)

top_k = tf.nn.in_top_k(label_prob, y_hat_ind, 1)
acc = tf.reduce_mean(tf.to_float(top_k))
tf.scalar_summary('train avg accuracy', acc)
correct_prob = tf.reduce_sum(
    tf.log(tf.clip_by_value(label_prob, eps, 1.0)) * y_hat, 1)
Ejemplo n.º 38
0
    def __call__(self, inputs, state, scope=None):

        scope = scope or type(self).__name__

        # It's always a good idea to scope variables in functions lest they
        # be defined elsewhere!
        input_size = inputs.get_shape()[1]
        #print('Input size: ' , input_size)

        with tf.variable_scope(scope):
            ### YOUR CODE HERE (~20-30 lines)
            W_c = tf.get_variable(
                "W_c", (input_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            U_c = tf.get_variable(
                "U_c", (self.state_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            b_c = tf.get_variable("b_c", (self.state_size),
                                  initializer=tf.constant_initializer(0))

            W_o = tf.get_variable(
                "W_o", (input_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            U_o = tf.get_variable(
                "U_o", (self.state_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            b_o = tf.get_variable("b_o", (self.state_size),
                                  initializer=tf.constant_initializer(0))

            W_i = tf.get_variable(
                "W_i", (input_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            U_i = tf.get_variable(
                "U_i", (self.state_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            b_i = tf.get_variable("b_i", (self.state_size),
                                  initializer=tf.constant_initializer(0))

            W_f = tf.get_variable(
                "W_f", (input_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            U_f = tf.get_variable(
                "U_f", (self.state_size, self.state_size),
                initializer=tf.contrib.layers.xavier_initializer())
            b_f = tf.get_variable("b_f", (self.state_size),
                                  initializer=tf.constant_initializer(0))

            o_t = tf.sigmoid(
                tf.batch_matmul(inputs, W_o) + tf.batch_matmul(state[0], U_o) +
                b_c)

            f_t = tf.sigmoid(
                tf.batch_matmul(inputs, W_f) + tf.batch_matmul(state[0], U_f) +
                b_f)
            i_t = tf.sigmoid(
                tf.batch_matmul(inputs, W_i) + tf.batch_matmul(state[0], U_i) +
                b_i)
            c_t_tilde = tf.tanh(
                tf.batch_matmul(inputs, W_c) + tf.batch_matmul(state[0], U_c) +
                b_c)
            c_t = state[1] * f_t + i_t * c_t_tilde

            h_t = o_t * tf.tanh(c_t)

            #o_t = tf.tanh(tf.matmul(inputs,U_o)+ r_t*tf.matmul(state,W_o) + b_o)

            new_state = [h_t, c_t]

            ### END YOUR CODE ###
        # For a GRU, the output and state are the same (N.B. this isn't true
        # for an LSTM, though we aren't using one of those in our
        # assignment)
        output = new_state
        return h_t, new_state
Ejemplo n.º 39
0
    def __init__(self,
                 hidden_num,
                 inputs,
                 seq_len=None,
                 cell=None,
                 optimizer=None,
                 reverse=True,
                 decode_without_input=True):
        """
    Args:
      hidden_num : number of hidden elements of each LSTM unit.
      inputs : a list of input tensors with size 
              (batch_num x elem_num)
      cell : an rnn cell object (the default option 
            is `tf.python.ops.rnn_cell.LSTMCell`)
      optimizer : optimizer for rnn (the default option is
              `tf.train.AdamOptimizer`)
      reverse : Option to decode in reverse order.
      decode_without_input : Option to decode without input.
    """

        self.batch_num = inputs[0].get_shape().as_list()[0]
        self.elem_num = inputs[0].get_shape().as_list()[1]

        if cell is None:
            self._enc_cell = LSTMCell(hidden_num)
            self._dec_cell = LSTMCell(hidden_num)
        else:
            self._enc_cell = cell
            self._dec_cell = cell

        with tf.variable_scope('encoder'):
            self.z_codes, self.enc_state = tf.nn.dynamic_rnn(
                self._enc_cell,
                inputs,
                sequence_length=seq_len,
                dtype=tf.float32)
            self.enc_state = tf.identity(self.enc_state, name='enc_state')

        with tf.variable_scope('decoder') as vs:
            dec_weight_ = tf.Variable(tf.truncated_normal(
                [hidden_num, self.elem_num], dtype=tf.float32),
                                      name="dec_weight")
            dec_bias_ = tf.Variable(tf.constant(0.1,
                                                shape=[self.elem_num],
                                                dtype=tf.float32),
                                    name="dec_bias")

            if decode_without_input:
                dec_inputs = [
                    tf.zeros(tf.shape(inputs[0]), dtype=tf.float32)
                    for _ in range(len(inputs))
                ]
                dec_outputs, dec_state = tf.nn.rnn(
                    self._dec_cell,
                    dec_inputs,
                    initial_state=self.enc_state,
                    sequence_length=seq_len,
                    dtype=tf.float32)
                """the shape of each tensor
          dec_output_ : (step_num x hidden_num)
          dec_weight_ : (hidden_num x elem_num)
          dec_bias_ : (elem_num)
          output_ : (step_num x elem_num)
          input_ : (step_num x elem_num)
        """
                if reverse:
                    dec_outputs = dec_outputs[::-1]
                dec_output_ = tf.transpose(tf.pack(dec_outputs), [1, 0, 2])
                dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0),
                                      [self.batch_num, 1, 1])
                self.output_ = tf.batch_matmul(dec_output_,
                                               dec_weight_) + dec_bias_

            else:
                dec_state = self.enc_state
                dec_input_ = tf.zeros(tf.shape(inputs[0]), dtype=tf.float32)
                dec_outputs = []
                for step in range(len(inputs)):
                    if step > 0: vs.reuse_variables()
                    dec_input_, dec_state = self._dec_cell(
                        dec_input_, dec_state)
                    dec_input_ = tf.matmul(dec_input_, dec_weight_) + dec_bias_
                    dec_outputs.append(dec_input_)
                if reverse:
                    dec_outputs = dec_outputs[::-1]
                self.output_ = tf.transpose(tf.pack(dec_outputs), [1, 0, 2])

        self.input_ = tf.transpose(tf.pack(inputs), [1, 0, 2])
        self.loss = tf.reduce_mean(tf.square(self.input_ - self.output_))

        if optimizer is None:
            self.train = tf.train.AdamOptimizer().minimize(self.loss)
        else:
            self.train = optimizer.minimize(self.loss)
Ejemplo n.º 40
0
biases = {
    'out': tf.Variable(tf.random_normal([n_classes],dtype=tf.float32))
}

# need to get a prediction for each sentence

# get the vector representation of each word
#pred1 = np.mean(x1, axis=1)#conv_net(x1, weights, biases, keep_prob)
#pred2 = np.mean(x2, axis=1)#conv_net(x2, weights, biases, keep_prob)

# concatenate both representations
out = tf.concat(1, [x1, x2])#[pred1, pred2])

# predict the relation class
pred = tf.add(tf.batch_matmul(out, weights['out']), biases['out'])
print(tf.shape(pred))

# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initializing all variables
init = tf.global_variables_initializer()

# launch the graph
saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
Ejemplo n.º 41
0
def batch_dot(x, y, axes=None):
    """Batchwise dot product.

    `batch_dot` is used to compute dot product of `x` and `y` when
    `x` and `y` are data in batch, i.e. in a shape of
    `(batch_size, :)`.
    `batch_dot` results in a tensor or variable with less dimensions
    than the input. If the number of dimensions is reduced to 1,
    we use `expand_dims` to make sure that ndim is at least 2.

    # Arguments
        x, y: Keras tensors or variables with `ndim >= 2`
        axes: list of (or single) int with target dimensions.
            The lengths of `axes[0]` and `axes[1]` should be the same.

    # Returns
        A tensor with shape equal to the concatenation of `x`'s shape
        (less the dimension that was summed over) and `y`'s shape
        (less the batch dimension and the dimension that was summed over).
        If the final rank is 1, we reshape it to `(batch_size, 1)`.

    # Examples
        Assume `x = [[1, 2], [3, 4]]` and `y = [[5, 6], [7, 8]]`
        `batch_dot(x, y, axes=1) = [[17, 53]]` which is the main diagonal
        of `x.dot(y.T)`, although we never have to calculate the off-diagonal
        elements.

        Shape inference:
        Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`.
        If `axes` is (1, 2), to find the output shape of resultant tensor,
            loop through each dimension in `x`'s shape and `y`'s shape:

        * `x.shape[0]` : 100 : append to output shape
        * `x.shape[1]` : 20 : do not append to output shape,
            dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1)
        * `y.shape[0]` : 100 : do not append to output shape,
            always ignore first dimension of `y`
        * `y.shape[1]` : 30 : append to output shape
        * `y.shape[2]` : 20 : do not append to output shape,
            dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2)
        `output_shape` = `(100, 30)`

    ```python
        >>> x_batch = K.ones(shape=(32, 20, 1))
        >>> y_batch = K.ones(shape=(32, 30, 20))
        >>> xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=[1, 2])
        >>> K.int_shape(xy_batch_dot)
        (32, 1, 30)
    ```
    """
    if isinstance(axes, int):
        axes = (axes, axes)
        #print('1')
    if ndim(x) == 2 and ndim(y) == 2:
        if tf_major_version >= 1:
            if axes[0] == axes[1]:
                out = tf.reduce_sum(tf.multiply(x, y), axes[0])
            else:
                out = tf.reduce_sum(tf.multiply(tf.transpose(x, [1, 0]), y), axes[1])
        else:
            if axes[0] == axes[1]:
                out = tf.reduce_sum(tf.mul(x, y), axes[0])
            else:
                out = tf.reduce_sum(tf.mul(tf.transpose(x, [1, 0]), y), axes[1])
    else:
        if axes is not None:
            #print('2')
            adj_x = None if axes[0] == ndim(x) - 1 else True
            adj_y = True if axes[1] == ndim(y) - 1 else None
        else:
            #print('3')
            adj_x = None
            adj_y = None
        # TODO: remove later.
        if hasattr(tf, 'batch_matmul'):
            try:
                out = tf.batch_matmul(x, y, adj_a=adj_x, adj_b=adj_y)
                #print('4')
            except TypeError:
                out = tf.batch_matmul(x, y, adj_x=adj_x, adj_y=adj_y)
        else:
            out = tf.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y)
    if ndim(out) == 1:
        out = expand_dims(out, 1)
    return out
Ejemplo n.º 42
0
    def __init__(self, is_training, word_embeddings, settings):

        self.num_steps = num_steps = settings.num_steps
        self.vocab_size = vocab_size = settings.vocab_size
        self.num_classes = num_classes = settings.num_classes
        self.gru_size = gru_size = settings.gru_size
        self.big_num = big_num = settings.big_num

        self.input_word = tf.placeholder(dtype=tf.int32,
                                         shape=[None, num_steps],
                                         name='input_word')
        self.input_pos1 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, num_steps],
                                         name='input_pos1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, num_steps],
                                         name='input_pos2')
        self.absolute_pos1 = tf.placeholder(dtype=tf.int32,
                                            shape=[None, num_steps],
                                            name='absolute_pos1')
        self.absolute_pos2 = tf.placeholder(dtype=tf.int32,
                                            shape=[None, num_steps],
                                            name='absolute_pos2')
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, num_classes],
                                      name='input_y')
        self.total_shape = tf.placeholder(dtype=tf.int32,
                                          shape=[big_num + 1],
                                          name='total_shape')
        total_num = self.total_shape[-1]

        word_embedding = tf.get_variable(initializer=word_embeddings,
                                         name='word_embedding')
        pos1_embedding = tf.get_variable('pos1_embedding',
                                         [settings.pos_num, settings.pos_size])
        pos2_embedding = tf.get_variable('pos2_embedding',
                                         [settings.pos_num, settings.pos_size])

        attention_w = tf.get_variable('attention_omega', [gru_size, 1])
        sen_a = tf.get_variable('attention_A', [gru_size])
        sen_r = tf.get_variable('query_r', [gru_size, 1])
        relation_embedding = tf.get_variable('relation_embedding',
                                             [self.num_classes, gru_size])
        sen_d = tf.get_variable('bias_d', [self.num_classes])

        gru_cell_forward = tf.nn.rnn_cell.GRUCell(gru_size)
        gru_cell_backward = tf.nn.rnn_cell.GRUCell(gru_size)

        if is_training and settings.keep_prob < 1:
            gru_cell_forward = tf.nn.rnn_cell.DropoutWrapper(
                gru_cell_forward, output_keep_prob=settings.keep_prob)
            gru_cell_backward = tf.nn.rnn_cell.DropoutWrapper(
                gru_cell_backward, output_keep_prob=settings.keep_prob)

        cell_forward = tf.nn.rnn_cell.MultiRNNCell([gru_cell_forward] *
                                                   settings.num_layers)
        cell_backward = tf.nn.rnn_cell.MultiRNNCell([gru_cell_backward] *
                                                    settings.num_layers)

        sen_repre = []
        sen_alpha = []
        sen_s = []
        sen_out = []
        self.prob = []
        self.predictions = []
        self.loss = []
        self.accuracy = []
        self.total_loss = 0.0

        self._initial_state_forward = cell_forward.zero_state(
            total_num, tf.float32)
        self._initial_state_backward = cell_backward.zero_state(
            total_num, tf.float32)

        # embedding layer
        inputs_forward = tf.concat(2, [
            tf.nn.embedding_lookup(word_embedding, self.input_word),
            tf.nn.embedding_lookup(pos1_embedding, self.input_pos1),
            tf.nn.embedding_lookup(pos2_embedding, self.input_pos2)
        ])
        inputs_backward = tf.concat(2, [
            tf.nn.embedding_lookup(word_embedding,
                                   tf.reverse(self.input_word, [False, True])),
            tf.nn.embedding_lookup(pos1_embedding,
                                   tf.reverse(self.input_pos1, [False, True])),
            tf.nn.embedding_lookup(pos1_embedding,
                                   tf.reverse(self.input_pos2, [False, True]))
        ])

        outputs_forward = []

        state_forward = self._initial_state_forward

        # Bi-GRU layer
        with tf.variable_scope('GRU_FORWARD'):
            for step in range(num_steps):
                if step > 0:
                    tf.get_variable_scope().reuse_variables()
                (cell_output_forward,
                 state_forward) = cell_forward(inputs_forward[:, step, :],
                                               state_forward)
                outputs_forward.append(cell_output_forward)

        outputs_backward = []

        state_backward = self._initial_state_backward
        with tf.variable_scope('GRU_BACKWARD'):
            for step in range(num_steps):
                if step > 0:
                    tf.get_variable_scope().reuse_variables()
                (cell_output_backward,
                 state_backward) = cell_backward(inputs_backward[:, step, :],
                                                 state_backward)
                outputs_backward.append(cell_output_backward)

        output_forward = tf.reshape(tf.concat(1, outputs_forward),
                                    [total_num, num_steps, gru_size])
        output_backward = tf.reverse(
            tf.reshape(tf.concat(1, outputs_backward),
                       [total_num, num_steps, gru_size]), [False, True, False])

        # word-level attention layer
        output_h = tf.add(output_forward, output_backward)
        #attention_r = tf.reshape(tf.batch_matmul(tf.reshape(tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(tf.tanh(output_h),[total_num*num_steps,gru_size]),attention_w),[total_num,num_steps])),[total_num,1,num_steps]),output_h),[total_num,gru_size])
        attention_r = tf.reshape(
            tf.batch_matmul(
                tf.reshape(tf.cast(self.absolute_pos1, tf.float32),
                           [total_num, 1, num_steps]), output_h),
            [total_num, gru_size])

        # sentence-level attention layer
        for i in range(big_num):

            sen_repre.append(
                tf.tanh(attention_r[self.total_shape[i]:self.total_shape[i +
                                                                         1]]))
            batch_size = self.total_shape[i + 1] - self.total_shape[i]

            sen_alpha.append(
                tf.reshape(
                    tf.nn.softmax(
                        tf.reshape(
                            tf.matmul(tf.mul(sen_repre[i], sen_a), sen_r),
                            [batch_size])), [1, batch_size]))

            sen_s.append(
                tf.reshape(tf.matmul(sen_alpha[i], sen_repre[i]),
                           [gru_size, 1]))
            sen_out.append(
                tf.add(
                    tf.reshape(tf.matmul(relation_embedding, sen_s[i]),
                               [self.num_classes]), sen_d))

            self.prob.append(tf.nn.softmax(sen_out[i]))

            with tf.name_scope("output"):
                self.predictions.append(
                    tf.argmax(self.prob[i], 0, name="predictions"))

            with tf.name_scope("loss"):
                self.loss.append(
                    tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(
                            sen_out[i], self.input_y[i])))
                if i == 0:
                    self.total_loss = self.loss[i]
                else:
                    self.total_loss += self.loss[i]

            #tf.summary.scalar('loss',self.total_loss)
            #tf.scalar_summary(['loss'],[self.total_loss])
            with tf.name_scope("accuracy"):
                self.accuracy.append(
                    tf.reduce_mean(tf.cast(
                        tf.equal(self.predictions[i],
                                 tf.argmax(self.input_y[i], 0)), "float"),
                                   name="accuracy"))

        #tf.summary.scalar('loss',self.total_loss)
        tf.scalar_summary('loss', self.total_loss)
        #regularization
        self.l2_loss = tf.contrib.layers.apply_regularization(
            regularizer=tf.contrib.layers.l2_regularizer(0.0001),
            weights_list=tf.trainable_variables())
        self.final_loss = self.total_loss + self.l2_loss
        tf.scalar_summary('l2_loss', self.l2_loss)
        tf.scalar_summary('final_loss', self.final_loss)
Ejemplo n.º 43
0
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False):
    """
    Given F, representing the GP at the points X, produce the mean and
    (co-)variance of the GP at the points Xnew.

    Additionally, there my be Gaussian uncertainty about F as represented by
    q_sqrt. In this case `f` represents the mean of the distribution and
    q_sqrt the square-root of the covariance.

    Additionally, the GP may have been centered (whitened) so that
        p(v) = N( 0, I)
        f = L v
    thus
        p(f) = N(0, LL^T) = N(0, K).
    In this case 'f' represents the values taken by v.

    The method can either return the diagonals of the covariance matrix for
    each output of the full covariance matrix (full_cov).

    We assume K independent GPs, represented by the columns of f (and the
    last dimension of q_sqrt).

     - Xnew is a data matrix, size N x D
     - X are data points, size M x D
     - kern is a GPflow kernel
     - f is a data matrix, M x K, representing the function values at X, for K functions.
     - q_sqrt (optional) is a matrix of standard-deviations or Cholesky
       matrices, size M x K or M x M x K
     - whiten (optional) is a boolean: whether to whiten the representation
       as described above.

    These functions are now considered deprecated, subsumed into this one:
        gp_predict
        gaussian_gp_predict
        gp_predict_whitened
        gaussian_gp_predict_whitened

    """

    # compute kernel stuff
    num_data = tf.shape(X)[0]
    Kmn = kern.K(X, Xnew)
    Kmm = kern.K(X) + eye(num_data) * settings.numerics.jitter_level
    Lm = tf.cholesky(Kmm)

    # Compute the projection matrix A
    A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)

    # compute the covariance due to the conditioning
    if full_cov:
        fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True)
        shape = tf.pack([tf.shape(f)[1], 1, 1])
    else:
        fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
        shape = tf.pack([tf.shape(f)[1], 1])
    fvar = tf.tile(tf.expand_dims(fvar, 0), shape)  # D x N x N or D x N

    # another backsubstitution in the unwhitened case
    if not whiten:
        A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)

    # construct the conditional mean
    fmean = tf.matmul(tf.transpose(A), f)

    if q_sqrt is not None:
        if q_sqrt.get_shape().ndims == 2:
            LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2)  # D x M x N
        elif q_sqrt.get_shape().ndims == 3:
            L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1,
                                    0)  # D x M x M
            A_tiled = tf.tile(tf.expand_dims(A, 0),
                              tf.pack([tf.shape(f)[1], 1, 1]))
            LTA = tf.batch_matmul(L, A_tiled, adj_x=True)  # D x M x N
        else:  # pragma: no cover
            raise ValueError("Bad dimension for q_sqrt: %s" %
                             str(q_sqrt.get_shape().ndims))
        if full_cov:
            fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True)  # D x N x N
        else:
            fvar = fvar + tf.reduce_sum(tf.square(LTA), 1)  # D x N
    fvar = tf.transpose(fvar)  # N x D or N x N x D

    return fmean, fvar
Ejemplo n.º 44
0
    q_out = tf.pack(q_out)
    q_out = tf.reduce_sum(
        tf.mul(q_out, tf.to_float(tf.expand_dims(Pl['question_mask'], -1))), 0)
    Vatt = compute_attention(V, q_out)

    x = merge_modalities(Vatt, q_out)

    mc_mask = tf.to_float(tf.not_equal(Pl['mc'], a_w2i['</s>']))
    norm_mask = tf.expand_dims(tf.reduce_sum(mc_mask, reduction_indices=2), -1)
    with tf.variable_scope('multiple_choice'):
        W = tf.get_variable('W')
        mc_emb = tf.nn.embedding_lookup(W, Pl['mc'])
        masked_mc_out = tf.mul(tf.expand_dims(mc_mask, -1), mc_emb)
        mc_out = tf.reduce_sum(masked_mc_out, reduction_indices=2) / norm_mask

    out_scores = tf.batch_matmul(mc_out, tf.expand_dims(x, 1),
                                 adj_y=True)[:, :, 0]
    out_probas = tf.nn.softmax(out_scores)

    normalized_ans = Pl['answers'] / tf.expand_dims(
        tf.reduce_sum(Pl['answers'], reduction_indices=1), -1)

    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        out_scores, normalized_ans)
    cost = tf.reduce_mean(cross_entropy)

    optimizer = tf.train.AdamOptimizer()
    #optimizer = tf.train.GradientDescentOptimizer(0.01)
    gvs = optimizer.compute_gradients(cost)
    # with tf.device('/cpu:0'):
    cost_s = tf.scalar_summary('train loss', cost, name='train_loss')
    capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
 def _random_pd_matrix(self, shape):
     # With probability 1 this is positive definite.
     sqrt = self._rng.randn(*shape)
     mat = tf.batch_matmul(sqrt, sqrt, adj_y=True)
     return mat.eval()
Ejemplo n.º 46
0
    def __init__(self, config):

        entity_total = config.entity
        relation_total = config.relation
        batch_size = config.batch_size
        sizeE = config.hidden_sizeE
        sizeR = config.hidden_sizeR
        margin = config.margin

        with tf.name_scope("read_inputs"):
            self.pos_h = tf.placeholder(tf.int32, [batch_size])
            self.pos_t = tf.placeholder(tf.int32, [batch_size])
            self.pos_r = tf.placeholder(tf.int32, [batch_size])
            self.neg_h = tf.placeholder(tf.int32, [batch_size])
            self.neg_t = tf.placeholder(tf.int32, [batch_size])
            self.neg_r = tf.placeholder(tf.int32, [batch_size])

        with tf.name_scope("embedding"):
            self.ent_embeddings = tf.get_variable(
                name="ent_embedding",
                shape=[entity_total, sizeE],
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False))
            self.rel_embeddings = tf.get_variable(
                name="rel_embedding",
                shape=[relation_total, sizeR],
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False))
            self.rel_matrix = tf.get_variable(
                name="rel_matrix",
                shape=[relation_total, sizeE * sizeR],
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False))

        with tf.name_scope('lookup_embeddings'):
            pos_h_e = tf.reshape(
                tf.nn.embedding_lookup(self.ent_embeddings, self.pos_h),
                [-1, sizeE, 1])
            pos_t_e = tf.reshape(
                tf.nn.embedding_lookup(self.ent_embeddings, self.pos_t),
                [-1, sizeE, 1])
            pos_r_e = tf.reshape(
                tf.nn.embedding_lookup(self.rel_embeddings, self.pos_r),
                [-1, sizeR])
            neg_h_e = tf.reshape(
                tf.nn.embedding_lookup(self.ent_embeddings, self.neg_h),
                [-1, sizeE, 1])
            neg_t_e = tf.reshape(
                tf.nn.embedding_lookup(self.ent_embeddings, self.neg_t),
                [-1, sizeE, 1])
            neg_r_e = tf.reshape(
                tf.nn.embedding_lookup(self.rel_embeddings, self.neg_r),
                [-1, sizeR])
            matrix = tf.reshape(
                tf.nn.embedding_lookup(self.rel_matrix, self.neg_r),
                [-1, sizeR, sizeE])

            pos_h_e = tf.reshape(tf.batch_matmul(matrix, pos_h_e), [-1, sizeR])
            pos_t_e = tf.reshape(tf.batch_matmul(matrix, pos_t_e), [-1, sizeR])
            neg_h_e = tf.reshape(tf.batch_matmul(matrix, neg_h_e), [-1, sizeR])
            neg_t_e = tf.reshape(tf.batch_matmul(matrix, neg_t_e), [-1, sizeR])

        if config.L1_flag:
            pos = tf.reduce_sum(abs(pos_h_e + pos_r_e - pos_t_e),
                                1,
                                keep_dims=True)
            neg = tf.reduce_sum(abs(neg_h_e + neg_r_e - neg_t_e),
                                1,
                                keep_dims=True)
        else:
            pos = tf.reduce_sum((pos_h_e + pos_r_e - pos_t_e)**2,
                                1,
                                keep_dims=True)
            neg = tf.reduce_sum((neg_h_e + neg_r_e - neg_t_e)**2,
                                1,
                                keep_dims=True)

        with tf.name_scope("output"):
            self.loss = tf.reduce_sum(tf.maximum(pos - neg + margin, 0))
Ejemplo n.º 47
0
def batch_timesteps_linear(
        input,
        output_size,
        bias,
        bias_start=0.0,
        use_l2_loss=False,
        use_weight_normalization=use_weight_normalization_default,
        scope=None,
        tranpose_input=True,
        timestep=-1):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
  Args:
    args: a 3D Tensor [timesteps, batch_size, input_size]
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
    scope: VariableScope for the created subgraph; defaults to "Linear".
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
    # Calculate the total size of arguments on dimension 2.
    if tranpose_input:
        input = tf.transpose(input, [1, 0, 2])

    shape_list = input.get_shape().as_list()
    if len(shape_list) != 3:
        raise ValueError(
            'shape must be of size 3, you have inputted shape size of:',
            len(shape_list))

    num_timesteps = shape_list[0]
    batch_size = shape_list[1]
    total_arg_size = shape_list[2]

    if use_l2_loss:
        l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
    else:
        l_regularizer = None

    # Now the computation.
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable(
            "Matrix", [total_arg_size, output_size],
            initializer=tf.uniform_unit_scaling_initializer(),
            regularizer=l_regularizer)
        if use_weight_normalization: matrix = weight_normalization(matrix)
        matrix = tf.tile(tf.expand_dims(matrix, 0), [num_timesteps, 1, 1])

        res = tf.batch_matmul(input, matrix)

        if bias:
            bias_term = tf.get_variable(
                "Bias", [output_size],
                initializer=tf.constant_initializer(bias_start))
            res = res + bias_term

    if tranpose_input:
        res = tf.transpose(res, [1, 0, 2])

    return res
Ejemplo n.º 48
0
def diagonal_bilinear(inputs1, inputs2, output_size, add_bias1=True, add_bias2=True, initializer=None, scope=None, moving_params=None):
  """"""
  
  with tf.variable_scope(scope or 'DiagonalBilinear'):
    # Reformat the inputs
    ndims = len(inputs1.get_shape().as_list())
    inputs1_shape = tf.shape(inputs1)
    inputs1_bucket_size = inputs1_shape[ndims-2]
    inputs1_size = inputs1.get_shape().as_list()[-1]
    
    inputs2_shape = tf.shape(inputs2)
    inputs2_bucket_size = inputs2_shape[ndims-2]
    inputs2_size = inputs2.get_shape().as_list()[-1]
    output_shape = []
    batch_size = 1
    for i in xrange(ndims-2):
      batch_size *= inputs1_shape[i]
      output_shape.append(inputs1_shape[i])
    output_shape.append(inputs1_bucket_size)
    output_shape.append(output_size)
    output_shape.append(inputs2_bucket_size)
    output_shape = tf.pack(output_shape)
    inputs1 = tf.reshape(inputs1, tf.pack([batch_size, inputs1_bucket_size, inputs1_size]))
    inputs2 = tf.reshape(inputs2, tf.pack([batch_size, inputs2_bucket_size, inputs2_size]))
    
    # Get the matrix
    if initializer is None and moving_params is None:
      initializer = tf.ones_initializer
    weights = tf.get_variable('Weights', [output_size, inputs1_size], initializer=initializer)
    if moving_params is not None:
      weights = moving_params.average(weights)
    else:
      tf.add_to_collection('Weights', weights)
    
    # Get the bias
    if add_bias:
      bias = tf.get_variable('Biases', [output_size], initializer=tf.zeros_initializer)
      if moving_params is not None:
        bias = moving_params.average(bias)
      bias = tf.reshape(bias, [-1,1])
    else:
      bias = 0
    
    # Do the multiplications
    # (bn x 1 x d) (r x d) -> (bn x r x d)
    lin = tf.reshape(inputs1, [-1, 1, inputs1_size]) * weights
    # (b x nr x d) (b x n x d)T -> (b x nr x n)
    bilin = tf.batch_matmul(tf.reshape(lin, tf.pack([batch_size, inputs1_bucket_size*output_size, inputs2_size])),
                                   inputs2, adj_y=True)
    # (bn x r x n)
    bilin = tf.reshape(bilin, tf.pack([-1, output_size, inputs2_bucket_size])) + bias
    # (b x n x r x n)
    bilin = tf.reshape(bilin, output_shape)
    
    if add_bias1:
      with tf.variable_scope('Input1_Biases'):
        inputs1.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(inputs1_size)])
        bilin += tf.expand_dims(linear(inputs1, output_size, add_bias=False, moving_params=moving_params), 3)
    if add_bias2:
      with tf.variable_scope('Input2_Biases'):
        inputs2.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(inputs2_size)])
        bilin += tf.expand_dims(tf.transpose(linear(inputs2, output_size, add_bias=False, moving_params=moving_params), [0, 2, 1]), 1)
    
    return bilin
Ejemplo n.º 49
0
print("train count:\t%d"%(trainSet.shape[0]))
print("test count:\t%d"%(testSet.shape[0]))
print("="*20)

# embedding layer
u = tf.placeholder(tf.int32,   [None, 1])
v = tf.placeholder(tf.int32,   [None, 1])
r = tf.placeholder(tf.float32, [None, 1])

U = tf.Variable(tf.random_uniform([userCount, k], -0.05, 0.05))
V = tf.Variable(tf.random_uniform([itemCount, k], -0.05, 0.05))

uFactor = tf.nn.embedding_lookup(U, u)
vFactor = tf.nn.embedding_lookup(V, v)

matmul = tf.reshape(tf.batch_matmul(uFactor, vFactor, adj_x=True, adj_y=False), [-1, k*k])
merge = tf.concat(1, [tf.reshape(uFactor, [-1, k]), tf.reshape(vFactor, [-1, k]), matmul])

# fully connection layer
import math
layer1 = k * k + 2 * k
layer2 = k
scale1 = math.sqrt(6.0 / (layer1 + layer2))
scale2 = math.sqrt(6.0 / (layer2 + 1))

W1 = tf.Variable(tf.random_uniform([layer1, layer2], -scale1, scale1))
b1 = tf.Variable(tf.random_uniform([layer2], -scale1, scale1))
y1 = tf.sigmoid(tf.matmul(merge, W1) + b1)

W2 = tf.Variable(tf.random_uniform([layer2, 1], -scale2, scale2))
b2 = tf.Variable(tf.random_uniform([1], -scale2, scale2))
Ejemplo n.º 50
0
    def add_model(self):
        with tf.device('/cpu:0'):
            x = tf.placeholder(tf.int32, [None, self.args.num_features])
            y = tf.placeholder(tf.float32, [None])

            b = tf.Variable(tf.random_uniform([1], -.1, .1))

            embedding_w = tf.concat(0, [
                tf.constant([[0.] * 1], dtype=tf.float32),
                tf.Variable(
                    tf.random_uniform([self.args.max_features, 1], -.1, .1))
            ])

            embedding_v = tf.concat(0, [
                tf.constant([[0.] * self.args.dim], dtype=tf.float32),
                tf.Variable(
                    tf.random_uniform([self.args.max_features, self.args.dim],
                                      -.1, .1))
            ])

        with tf.device('/gpu:0'):
            embed_w = tf.nn.embedding_lookup(embedding_w, x)
            embed_v = tf.nn.embedding_lookup(embedding_v, x)

            w_x = tf.reduce_sum(embed_w, [1, 2])
            # print w_x.get_shape()

            m = tf.batch_matmul(embed_v, tf.transpose(embed_v, perm=[0, 2, 1]))
            # mask = np.array([[1 if j > i else 0 for j in range(self.args.num_features) ] for i in range(self.args.num_features)]).astype(bool)

            m_l = []
            for i in range(self.args.num_features):
                for j in range(i + 1, self.args.num_features):
                    m_l.append(tf.expand_dims(m[:, i, j], 1))

            mm = tf.concat(1, m_l)

            w_mm_dim = (self.args.num_features**2 -
                        self.args.num_features) / 2  # C(n, 2)

            # w1_mm = tf.Variable(tf.random_uniform([w_mm_dim, 1], -.1, .1))
            # b1_mm = tf.Variable(tf.random_uniform([1], -.1, .1))
            w1_mm = tf.Variable(tf.random_uniform([w_mm_dim, 1], 1.0, 1.0))
            b1_mm = tf.Variable(tf.random_uniform([1], 0.0, 0.0))

            a1_mm = tf.matmul(mm, w1_mm) + b1_mm

            z1_mm = a1_mm  # linear

            mmm = z1_mm

            vv_x = tf.squeeze(mmm, [1])

            # w_mm = tf.Variable(tf.random_uniform([w_mm_dim], -.1, .1))
            # w_mm = tf.Variable(tf.constant([2.0]*mm.get_shape()[1]))
            # mmm = mm * w_mm
            # vv_x =  tf.reduce_sum(mmm, [1])
            # mm =  tf.matrix_band_part(m, 0, -1) - tf.matrix_band_part(m, 0, 0)
            # w_mm = tf.Variable(tf.random_uniform([self.args.num_features, self.args.num_features], -.1, .1))

            # mmm = mm * w_mm
            # vv_x =  tf.reduce_sum(mmm, [1, 2])

            # vv_x = (tf.reduce_sum(tf.batch_matmul(embed_v, tf.transpose(embed_v, perm=[0, 2, 1])), [1, 2]) \
            # 	- tf.reduce_sum(embed_v ** 2, [1, 2]) ) #/ 2 #+ tf.reduce_sum(embed_v ** 2, [1, 2])
            # print vv_x.get_shape()
            all_x = b + w_x + vv_x
            # print all_x.get_shape()
            # this can only be used in tensorflow 0.12, due to tf.trace()
            # m = tf.batch_matmul(embed, tf.transpose(embed, perm=[0, 2, 1]))
            # wTx = ( tf.reduce_sum(m, [1, 2]) - tf.trace(m) ) / 2

            clip_all_x = tf.clip_by_value(all_x, -35., 35.)
            p = 1.0 / (1.0 + tf.exp(-clip_all_x))
            clip_p = tf.clip_by_value(p, 10e-8, 1.0 - 10e-8)

            # cost: logloss
            cost = -tf.reduce_sum(y * tf.log(clip_p) + (1.0-y) * tf.log(1.0-clip_p)) \
             + self.args.regular * (tf.nn.l2_loss(embed_w) \
              + tf.nn.l2_loss(embed_v) + tf.nn.l2_loss(w1_mm) )

            opt = tf.train.AdagradOptimizer(self.args.lr).minimize(cost)

            return {'x': x, 'y': y, 'p': clip_p, 'cost': cost, 'opt': opt}
    def build_model(self):
        image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
        question = tf.placeholder(tf.int32, [self.batch_size, self.max_words_q])
        answer = tf.placeholder(tf.int32, [self.batch_size, self.max_words_q])
        question_length = tf.placeholder(tf.int32, [self.batch_size])
        answer_length = tf.placeholder(tf.int32, [self.batch_size])
        label = tf.placeholder(tf.float32, [self.batch_size,2])

        state_que = tf.zeros([self.batch_size, self.input_embedding_size])  #zhe
        state_ans = tf.zeros([self.batch_size, self.input_embedding_size])  #zhe

        loss = 0.0

        q_mask = tf.cast(tf.sign(question), tf.float32)
        # 500 * 26
        ques = tf.nn.embedding_lookup(self.embed_ques_W, question)
        ques_drop = tf.nn.dropout(ques, 1-self.drop_out_rate)
        # 500 * 26 * 300
        ques_drop_ = tf.reshape(ques_drop, [-1, self.input_embedding_size])
        ques_after_emb_linear = tf.nn.xw_plus_b(ques_drop_, self.att_Q_W, self.att_Q_b)
        ques_after_emb = tf.tanh(ques_after_emb_linear)
        ques_after_emb = tf.reshape(ques_after_emb, [self.batch_size, self.max_words_q, self.dim_hidden])
        # 500 * 26 * 1024

        image_att = tf.nn.dropout(image, 1-self.drop_out_rate)
        image_att_linear = tf.nn.xw_plus_b(image_att, self.att_image_W, self.att_image_b)
        image_att_emb = tf.tanh(image_att_linear)
        image_emb_ = tf.reshape(image_att_emb, (self.batch_size, self.dim_hidden, 1))

        q_img = tf.batch_matmul(ques_after_emb, image_emb_)
        q_img_ = tf.reshape(q_img, (self.batch_size, self.max_words_q))

        q_img_softmax = tf.nn.log_softmax(q_img_)

        q_img_softmax = tf.mul(tf.exp(q_img_softmax), q_mask)
        q_img_softmax_sum = tf.reduce_sum(q_img_softmax, 1)
        q_img_softmax_sum_ = tf.expand_dims(q_img_softmax_sum, 1)
        q_img_softmax_ = q_img_softmax/q_img_softmax_sum_
        q_img_softmax_ = tf.reshape(q_img_softmax_, (self.batch_size, 1, self.max_words_q))

        q_final = tf.batch_matmul(q_img_softmax_, ques_drop)
        state_que = tf.reshape(q_final, (self.batch_size, self.input_embedding_size))

        # pdb.set_trace()

        ans = tf.nn.embedding_lookup(self.embed_ques_W, answer)
        # inputs = tf.div(tf.reduce_sum(inputs, 1), q_a_length)
        state_ans = tf.reduce_sum(ans, 1)
        loss = 0.0
        # state_que = inputs[0:500,:]

        # multimodal (fusing question & image)
        Q_drop = tf.nn.dropout(state_que, 1-self.drop_out_rate)
        Q_linear = tf.nn.xw_plus_b(Q_drop, self.embed_Q_W, self.embed_Q_b)
        Q_emb = tf.tanh(Q_linear)

        image_drop = tf.nn.dropout(image, 1-self.drop_out_rate)
        image_linear = tf.nn.xw_plus_b(image_drop, self.embed_image_W, self.embed_image_b)
        image_emb = tf.tanh(image_linear)

        A_drop = tf.nn.dropout(state_ans, 1-self.drop_out_rate)
        A_linear = tf.nn.xw_plus_b(A_drop, self.embed_A_W, self.embed_A_b)
        A_emb = tf.tanh(A_linear)

        QI = tf.mul(Q_emb, image_emb)

        QI_drop = tf.nn.dropout(QI, 1-self.drop_out_rate)
        QI_linear = tf.nn.xw_plus_b(QI_drop, self.embed_QI_W, self.embed_QI_b)
        QI_emb = tf.tanh(QI_linear)

        QIA = tf.mul(QI_emb, A_emb)
        scores_emb = tf.nn.xw_plus_b(QIA, self.embed_scor_W, self.embed_scor_b)   #zhe
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=scores_emb, labels=label)   #zhe
        # Calculate loss
        loss = tf.reduce_mean(cross_entropy)
        return loss, image, question, answer, question_length, answer_length, label
Ejemplo n.º 52
0
    def encode(self, inputs, masks, encoder_state_input):
        """
        In a generalized encode function, you pass in your inputs,
        masks, and an initial
        hidden state input into this function.

        :param inputs: Symbolic representations of your input
        :param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate
                      through masked steps
        :param encoder_state_input: (Optional) pass this as initial hidden state
                                    to tf.nn.dynamic_rnn to build conditional representations
        :return: an encoded representation of your input.
                 It can be context-level representation, word-level representation,
                 or both.
        """

        #read inputs
        question, paragraph = inputs
        q_mask, p_mask = masks

        #run biLSTM over question
        with tf.variable_scope('enc_q') as scope:
            encode_q_f_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size)
            encode_q_b_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size)
            q_outputs, q_end_state = tf.nn.bidirectional_dynamic_rnn(
                encode_q_f_cell,
                encode_q_b_cell,
                question,
                sequence_length=q_mask,
                dtype=tf.float32)  #LSTM returns a pair of hidden states (c, h)
            scope.reuse_variables()

        #concat end states to get question representation
        q_fwd_state, q_bkwd_state = q_end_state
        self.q_rep = tf.concat(
            1, (q_fwd_state[0], q_bkwd_state[0]))  #q rep is Batch by 2*H_size

        #run biLSTM over paragraph
        with tf.variable_scope('enc_p') as scope:
            encode_p_f_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size)
            encode_p_b_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size)
            p_outputs, p_end_state = tf.nn.bidirectional_dynamic_rnn(
                encode_p_f_cell,
                encode_p_b_cell,
                paragraph,
                sequence_length=p_mask,
                dtype=tf.float32)  #condition on q rep?
            scope.reuse_variables()
        self.p_rep = tf.concat(2, p_outputs)  #concat fwd and bkwd outputs

        #calc scores between paragraph hidden states and q-rep
        self.attention_weights = tf.get_variable(
            "attent_weights",
            shape=[2 * self.size, 2 * self.size],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())
        q_attention = tf.matmul(self.q_rep, self.attention_weights)
        unnorm_attention = tf.batch_matmul(
            self.p_rep, tf.expand_dims(q_attention,
                                       axis=-1))  #dims are batch by seq by 1
        self.attention = unnorm_attention / tf.sqrt(
            tf.reduce_sum(tf.square(unnorm_attention), axis=1, keep_dims=True))
        self.knowledge_rep = tf.multiply(self.p_rep, self.attention)

        return self.knowledge_rep, self.attention
Ejemplo n.º 53
0
batch_size = 2
memory_size = 3
n_memory_slots = 4
embedding_dim = 5
hidden_size = 6
depth = 2

shapes = {
    'gru_state': (batch_size, embedding_dim),
    'h': (batch_size, hidden_size),
    'M': (batch_size, memory_size, n_memory_slots),
    'w': (batch_size, n_memory_slots, 1),
    'input': (batch_size, hidden_size)
}


def ones_variable(name):
    shape = shapes[name]
    return tf.Variable(np.ones(shape), dtype=tf.float32, name=name)


with tf.Session() as sess:
    M = ones_variable('M')
    w = ones_variable('w')
    tf.initialize_all_variables().run()
    print(sess.run(tf.batch_matmul(M, w)))

# x = tf.zeros([2, 2])
# m = tf.zeros([2, 2])
# g, _ = tf.nn.rnn_cell.BasicRNNCell(2)(x, x)
Ejemplo n.º 54
0
def extend_vector(input, r, batch_size):
    """
    [a,b,c] --> [[a,a,a],[b,b,b],[c,c,c]] if D=3
    """
    return tf.batch_matmul(tf.ones([batch_size, r, 1]),
                           tf.expand_dims(input, 1))
Ejemplo n.º 55
0
    def key_addressing_and_value_reading(self, q_b, k_b, v_b):

        # Debugging
        logits_list = [None] * FLAGS.hops
        probs_list = [None] * FLAGS.hops
        o_list = [None] * FLAGS.hops
        q_list = [None] * FLAGS.hops
        self.debug_dict['logits'] = logits_list
        self.debug_dict['probs'] = probs_list
        self.debug_dict['o_list'] = o_list
        self.debug_dict['q_list'] = q_list
        for h in range(FLAGS.hops):

            #
            # Key Addressing
            #

            # [batch_sz, embedding_sz, 1]
            q_temp = tf.expand_dims(q_b, -1)

            # [batch_sz, mem_sz, 1]
            logits = tf.batch_matmul(k_b, q_temp)
            # [batch_sz, mem_sz]
            logits = tf.squeeze(logits)
            probs = tf.nn.softmax(logits)
            # Ignore memory padding
            probs = probs * self.mem_wts_b
            # [batch_sz, 1]
            z = tf.expand_dims(tf.reduce_sum(probs, 1), -1)
            # [batch_sz, mem_sz]
            probs = probs / z

            #
            # Value Reading
            #

            # [batch_sz, mem_sz, 1]
            probs = tf.expand_dims(probs, -1)
            # [batch_sz, embedding_sz]
            o = tf.reduce_sum(probs * v_b, 1)
            R = self.R_list[h]
            R_b = self.Rb_list[h]

            if FLAGS.value_reading == 'o':
                q_b = o
            elif FLAGS.value_reading == 'o.R':
                q_b = tf.matmul(o, R)
            elif FLAGS.value_reading == 'q + o':
                q_b = q_b + o
            elif FLAGS.value_reading == 'q + o.R':
                q_b = q_b + tf.matmul(o, R)
            elif FLAGS.value_reading == 'q.R + o':
                q_b = tf.matmul(q_b, R) + o
            elif FLAGS.value_reading == '(q + o).R':
                if h < FLAGS.hops - 1:
                    q_b = tf.matmul(q_b + o, R) + R_b
                else:
                    q_b = q_b + o
                    # q_b = tf.matmul(q_b + o, R) + R_b
            elif FLAGS.value_reading is None or \
                 FLAGS.value_reading == '(q + o).R & o.R':
                if h < FLAGS.hops - 1:
                    q_b = tf.matmul(q_b + o, R)
                else:
                    q_b = tf.matmul(o, R)
            else:
                assert (False)

            # Debugging
            logits_list[h] = logits
            probs_list[h] = probs
            o_list[h] = o
            q_list[h] = q_b
            # q_b = tf.tanh(q_b)

        return q_b
Ejemplo n.º 56
0
# h1         = tf.nn.elu(tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner = "sum") + b1)
# h1         = tf.nn.relu6(tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner = "sum") + b1)
l1 = tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner="sum") + b1
Ze = tf.nn.embedding_lookup(Z, z_idx)
if non_linear_z:
    h1 = tf.tanh(l1 + Ze)
else:
    h1 = tf.tanh(l1) + Ze

## batch normalization doesn't work that well in comparison to Torch
# h1         = batch_norm_wrapper(l1, tr_ind)

h1e = tf.nn.embedding_lookup(h1, y_idx_comp)
W2e = tf.nn.embedding_lookup(W2, y_idx_prot)
b2e = tf.nn.embedding_lookup(b2, tf.squeeze(y_idx_prot, [1]))
l2 = tf.squeeze(tf.batch_matmul(h1e, W2e, adj_y=True), [1, 2]) + b2e
y_pred = l2 + b2g

## batch normalization doesn't work that well in comparison to Torch
# scale2e    = tf.nn.embedding_lookup(scale2, tf.squeeze(y_idx_prot, [1]))
# beta2e     = tf.nn.embedding_lookup(beta2, tf.squeeze(y_idx_prot, [1]))
# batch_mean2, batch_var2 = tf.nn.moments(l2,[0])
# z2         = (l2 - batch_mean2) / tf.sqrt(batch_var2 + epsilon)
# y_pred     = scale2e * l2 + b2g

b_ratio = np.float32(Ncmpd) / np.float32(batch_size)

y_loss = tf.reduce_sum(tf.square(y_val - y_pred))
#l2_reg     = lambda_reg * tf.global_norm((W1, W2))**2 + lambda_zreg * b_ratio * tf.nn.l2_loss(Ze)
l2_reg = lambda_reg * tf.global_norm(
    (W1, W2))**2 + lambda_zreg * tf.nn.l2_loss(Z)
Ejemplo n.º 57
0
    def build_generator(self):
        video = tf.placeholder(
            tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image])
        video_mask = tf.placeholder(tf.float32,
                                    [self.batch_size, self.n_lstm_steps])

        video_flat = tf.reshape(video, [-1, self.dim_image])
        image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W,
                                    self.encode_image_b)
        image_emb = tf.reshape(
            image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden])
        image_emb = tf.transpose(image_emb, [1, 0, 2])

        state1 = tf.zeros([self.batch_size, self.lstm3.state_size])
        h_prev = tf.zeros([self.batch_size, self.dim_hidden])

        generated_words = []

        current_embed = tf.zeros([self.batch_size, self.dim_hidden])
        brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0),
                          [self.n_lstm_steps, 1, 1])  # n x h x 1
        image_part = tf.batch_matmul(
            image_emb,
            tf.tile(
                tf.expand_dims(self.embed_att_Ua, 0),
                [self.n_lstm_steps, 1, 1])) + self.embed_att_ba  # n x b x h
        for i in range(n_caption_step):
            e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) +
                        image_part)  # n x b x h
            e = tf.batch_matmul(e, brcst_w)
            e = tf.reduce_sum(e, 2)  # n x b
            e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e))  # n x b
            denomin = tf.reduce_sum(e_hat_exp, 0)  # b
            denomin = denomin + tf.to_float(tf.equal(denomin, 0))
            alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2),
                             [1, 1, self.dim_hidden])  # n x b x h
            attention_list = tf.mul(alphas, image_emb)  # n x b x h
            atten = tf.reduce_sum(attention_list, 0)  # b x h

            if i > 0: tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("LSTM3") as vs:
                output1, state1 = self.lstm3(
                    tf.concat(1, [atten, current_embed]), state1)  # b x h
                lstm3_variables = [
                    v for v in tf.all_variables() if v.name.startswith(vs.name)
                ]

            output2 = tf.tanh(
                tf.nn.xw_plus_b(tf.concat(1, [output1, atten, current_embed]),
                                self.embed_nn_Wp, self.embed_nn_bp))  # b x h
            h_prev = output1
            logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W,
                                          self.embed_word_b)  # b x w
            max_prob_index = tf.argmax(logit_words, 1)  # b
            generated_words.append(max_prob_index)  # b
            with tf.device("/cpu:0"):
                current_embed = tf.nn.embedding_lookup(self.Wemb,
                                                       max_prob_index)

        generated_words = tf.transpose(tf.pack(generated_words))
        return video, video_mask, generated_words, lstm3_variables
Ejemplo n.º 58
0
 def _transfer(self, transfer_matrix, embeddings):
     return tf.batch_matmul(transfer_matrix, embeddings)
Ejemplo n.º 59
0
def cumsum_weights(input, W, r=D):
    masked = mask(input, W, r)
    triangle = ones_triangular(NUM_NOTES)
    size = batch_size
    return tf.batch_matmul(masked, np.array([triangle] * size))
Ejemplo n.º 60
0
    def build_network(self):
        with tf.variable_scope('encoder'):
            z_mean_w = tf.Variable(
                self.initializer([self._enc_cell.state_size, self.n_latent]))
            z_mean_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32))
            z_logvar_w = tf.Variable(
                self.initializer([self._enc_cell.state_size, self.n_latent]))
            z_logvar_b = tf.Variable(
                tf.zeros([self.n_latent], dtype=tf.float32))

            _, enc_state = rnn.rnn(self._enc_cell,
                                   self.inputs,
                                   dtype=tf.float32)
            self.z_mean = tf.add(tf.matmul(enc_state, z_mean_w), z_mean_b)
            self.z_log_var = tf.add(tf.matmul(enc_state, z_logvar_w),
                                    z_logvar_b)
            eps = tf.random_normal((self.batch_size, self.n_latent),
                                   0,
                                   1,
                                   dtype=tf.float32)
            self.z = tf.add(self.z_mean,
                            tf.mul(tf.sqrt(tf.exp(self.z_log_var)), eps))

        with tf.variable_scope('decoder') as scope:
            dec_in_w = tf.Variable(
                self.initializer([self.n_latent, self._dec_cell.state_size],
                                 dtype=tf.float32))
            dec_in_b = tf.Variable(
                tf.zeros([self._dec_cell.state_size], dtype=tf.float32))
            dec_out_w = tf.Variable(
                self.initializer([self.n_hidden, self.elem_num],
                                 dtype=tf.float32))
            dec_out_b = tf.Variable(tf.zeros([self.elem_num],
                                             dtype=tf.float32))

            initial_dec_state = self.transfer_func(
                tf.add(tf.matmul(self.z, dec_in_w), dec_in_b))
            dec_out, _ = seq2seq.rnn_decoder(self.inputs, initial_dec_state,
                                             self._dec_cell)
            if self.reverse:
                dec_out = dec_out[::-1]
            dec_output = tf.transpose(tf.pack(dec_out), [1, 0, 2])
            batch_dec_out_w = tf.tile(tf.expand_dims(dec_out_w, 0),
                                      [self.batch_size, 1, 1])
            self.output = tf.nn.sigmoid(
                tf.batch_matmul(dec_output, batch_dec_out_w) + dec_out_b)

            scope.reuse_variables()
            dec_gen_input = [
                0.5 *
                tf.ones([self.batch_size, self.elem_num], dtype=tf.float32)
                for _ in range(self.step_num)
            ]
            self.z_gen = tf.placeholder(tf.float32,
                                        [self.batch_size, self.n_latent])
            dec_gen_state = self.transfer_func(
                tf.add(tf.matmul(self.z_gen, dec_in_w), dec_in_b))
            dec_gen_out, _ = seq2seq.rnn_decoder(dec_gen_input, dec_gen_state,
                                                 self._dec_cell)
            if self.reverse:
                dec_gen_out = dec_gen_out[::-1]
            dec_gen_output = tf.transpose(tf.pack(dec_gen_out), [1, 0, 2])
            self.gen_output = tf.nn.sigmoid(
                tf.batch_matmul(dec_gen_output, batch_dec_out_w) + dec_out_b)

        self.inp = tf.transpose(tf.pack(self.inputs), [1, 0, 2])
        self.train_loss = self.get_loss()
        self.train = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.train_loss)