Example #1
0
 def invert(self):
     """
     Computes the inverse of the unitary operator
     """
     # compute transpose of unitary
     self.unitary = tf.tranpose(self.unitary,
                                conjugate=True,
                                name="dagger_op")
Example #2
0
 def BilinearAttentionLayer(self,q,v):
     num_hid = 512
     self.h_mat = self.add_weight(name='h_mat',shape=([1,1,self.num_hid]),initializer='normal',trainable=True)
     self.h_bias = self.add_weight(name='h_bias',shape=([1,1,1]),initializer='normal',trainable=True)
     v_proj = self.fc(v,num_hid,activation_fn='relu')
     q_proj = tf.tranpose(tf.expand_dims(tf.nn.dropout(self.fc(q,num_hid,activation_fn='relu'),self.ph_dropout),1),[0,2,1])
     v_proj = (v_proj * self.h_mat)
     logits = tf.matmul(v_proj, q_proj) + self.h_bias #[batch, k, 1]
     return v * logits
Example #3
0
    def call(self, inputs):
        '''
        Upscales the input tensor.

        :param inputs:
            A tensor of shape [x, y, z].
        :returns:
            A tensor with shape [x, y, z * c].

        '''

        inputs = tf.expand_dims(inputs, 2)
        input_shape = inputs.shape
        inputs = tf.broadcast_to(inputs, (input_shape[0], input_shape[1], self.ratio]))
        inputs = tf.reshape(tf.tranpose(inputs, perm=(2, 3)), [-1])
        return inputs
Example #4
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                # TODO get_batch_data
                self.x, self.y, self.num_batch = get_batch_data()
            else:
                self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen))
                self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen))

            # TODO: define decoder input

            # TODO: encode vocab vs decode vocab
            encode2idx, idx2encode = load_encode_vocab()
            decode2idx, idx2decode = load_decode_vocab();

            x_len = tf.reduce_sum(tf.sign(self.x), 1)
            y_len = tf.reduce_sum(tf.sign(self.y), 1)

            with tf.variable_scope("encoder"):
                self.enc = embedding(
                        self.x, vocab_size=len(encode2idx),
                        num_units=hp.embedding_dim,
                        scale=False,
                        scope="encode_embed")

                # bi-LSTM -> drop-out -> bi-LSTM
                # hidden unit size = 600;
                cell = lstm_stack(
                        hp.hidden_units,
                        hp.dropout_rate,
                        is_training)
                (fw_h, bw_h), _ = tf.nn.bidirectional_dynamic_rnn(
                        cell, cell, self.enc, x_len, dtype='float')

                # bt: use for decoder attention compute
                # => reduce: only; shape: N, max_sent_size, 2 * hidden_size
                bt = tf.concat(
                        [fw_h[-1, :, :, :], bw_h[-1, :, :, :]], -1)

                self.enc = tf.concat(
                        fw_h[:, :, -1, :], bw_h[:, :, 0, :], -1)

            with tf.variable_scope("decoder"):
                self.dec = embedding(
                        self.y, vocab_size=len(decode2idx),
                        num_units=hp.embedding_dim,
                        scale=False,
                        scope="decode_embed")

                # LSTM
                cell = lstm_stack(
                        hp.hidden_units,
                        hp.dropout_rate,
                        is_training)

                # N, max_ques_size, hidden_units
                h, _ = tf.nn.dynamic_rnn(
                        cell, self.y, y_len,
                        initial_state=self.enc, dtype='float')

            with tf.variable_scope("attention"):
                wb = tf.get_variable("wb",
                        [2 * hp.hidden_units, hp.hidden_units],
                        initializer=tf.truncated_normal_initializer(stddev=1.0))
                # att shape: N, max_ques_size, max_sent_size
                logits = tf.matmul(
                        h, tf.matmul(bt, tf.expand_dims(wb, 0)),
                        transpose_b=True)
                logits_masks = tf.sign(tf.abs(logits))
                # construct negative infi..
                paddings = tf.ones_like(logits_masks) * (-2**32+1)
                logits = tf.where(tf.equal(logits_masks, 0), paddings, logits)

                att = tf.nn.softmax(logits)
                att_masks = tf.sign(self.y)
                att_masks = tf.tranpose(att_masks, perm=[0, 2, 1])
                att_masks = tf.tile(att_masks, [1, 1, tf.shape(self.x)[-1]])
                paddings = tf.zeros(att)
                # N, max_ques_size, max_sent_size
                att = tf.where(tf.equal(att_masks, 0), paddings, att)

                # N, max_ques_size, 2 * hidden_size
                c = tf.matmul(att, bt)
                c_masks = tf.sign(self.y)
                c_masks = tf.tranpose(c_masks, perm=[0, 2, 1])
                c_masks = tf.tile(c_masks, [1, 1, tf.shape(self.y)[-1]])
                paddings = tf.zeros(c)
                c = tf.where(tf.equal(c_masks, 0), paddings, c)

            with tf.variable_scope("prob"):
                combine = tf.concat([h, c], 2)
                wt = tf.get_variable("wt",
                        [2 * hp.hidden_units, hp.hidden_units],
                        initializer=tf.truncated_normal_initializer(stddev=1.0))

                logits = tf.matmul(
                        combine, tf.expand_dims(wt, 0))
                # tanh(0) == 0 => so no masks..
                # N, max_ques_size, hidden_units
                logits = tanh(logits)

                ws = tf.get_variable("ws",
                        [hp.hidden_units, len(decode2idx)],
                        initializer=tf.truncated_normal_initializer(stddev=1.0))
                logits = tf.matmul(
                        logits, tf.expand_dims(ws, 0))

                # N, max_ques_size, len(decode2idx)
                probs = tf.nn.softmax(logits)

                preds = tf.argmax(probs, 2)
                if is_training:
                    flat_probs = tf.reshape(probs, [-1, len(decode2idx)])
                    indices = tf.range(tf.shape(flat_probs)[0])
                    indices = tf.concat([indices, tf.reshape(self.y, [-1, 1])], 1)

                    y_probs = tf.gather_nd(probs, indices)
                    y_probs = tf.where(
                            tf.equal(tf.reshape(self.y, [-1, 1]), 0),
                            tf.zeros(y_probs, dtype='float'), y_probs)
                    self.loss = tf.log(y_probs)
                    self.loss = -tf.reduce_sum(self.loss)
                else:
Example #5
0
def tf_tranpose(inputs, perm=[0, 2, 3, 1, 4], name=None):
    return tf.tranpose(inputs, perm, name)
Example #6
0
		tf.truncated_normal([n_classes, n_hidden_1],
						stddev=1.0 / math.sqrt(n_hidden_1))		
)

nce_biases = tf.Variable(tf.zeros([n_classes]))

loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights,
									biases=nce_biases,
									labels=y_batch,
									inputs=pred,
									num_sampled=10,
									num_classes=n_classes))

cost = tf.reduce_sum(loss) / batch_size
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
out_layer = tf.matmul(pred, tf.tranpose(nce_weights)) + nce_biases#[batch_size, n_classes]

init = tf.global_variables_initializer()
with tf.Session() as sess:
	sess.run(init)

    # Training cycle
	start_time = time.time()
	total_batch = int(len(train_lst) / batch_size)
	print("total batch of training data: ", total_batch)
	for epoch in range(training_epochs):
		avg_cost = 0.0
		for i in range(total_batch):
			x, y, batch_mask, word_number = read_data(i*batch_size, batch_size, train_lst)
			_, c = sess.run([optimizer, cost], feed_dict={x_batch: x, emb_mask: batch_mask, word_num: word_number, y_batch: y})
			avg_cost += c / total_batch