def invert(self): """ Computes the inverse of the unitary operator """ # compute transpose of unitary self.unitary = tf.tranpose(self.unitary, conjugate=True, name="dagger_op")
def BilinearAttentionLayer(self,q,v): num_hid = 512 self.h_mat = self.add_weight(name='h_mat',shape=([1,1,self.num_hid]),initializer='normal',trainable=True) self.h_bias = self.add_weight(name='h_bias',shape=([1,1,1]),initializer='normal',trainable=True) v_proj = self.fc(v,num_hid,activation_fn='relu') q_proj = tf.tranpose(tf.expand_dims(tf.nn.dropout(self.fc(q,num_hid,activation_fn='relu'),self.ph_dropout),1),[0,2,1]) v_proj = (v_proj * self.h_mat) logits = tf.matmul(v_proj, q_proj) + self.h_bias #[batch, k, 1] return v * logits
def call(self, inputs): ''' Upscales the input tensor. :param inputs: A tensor of shape [x, y, z]. :returns: A tensor with shape [x, y, z * c]. ''' inputs = tf.expand_dims(inputs, 2) input_shape = inputs.shape inputs = tf.broadcast_to(inputs, (input_shape[0], input_shape[1], self.ratio])) inputs = tf.reshape(tf.tranpose(inputs, perm=(2, 3)), [-1]) return inputs
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: # TODO get_batch_data self.x, self.y, self.num_batch = get_batch_data() else: self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) # TODO: define decoder input # TODO: encode vocab vs decode vocab encode2idx, idx2encode = load_encode_vocab() decode2idx, idx2decode = load_decode_vocab(); x_len = tf.reduce_sum(tf.sign(self.x), 1) y_len = tf.reduce_sum(tf.sign(self.y), 1) with tf.variable_scope("encoder"): self.enc = embedding( self.x, vocab_size=len(encode2idx), num_units=hp.embedding_dim, scale=False, scope="encode_embed") # bi-LSTM -> drop-out -> bi-LSTM # hidden unit size = 600; cell = lstm_stack( hp.hidden_units, hp.dropout_rate, is_training) (fw_h, bw_h), _ = tf.nn.bidirectional_dynamic_rnn( cell, cell, self.enc, x_len, dtype='float') # bt: use for decoder attention compute # => reduce: only; shape: N, max_sent_size, 2 * hidden_size bt = tf.concat( [fw_h[-1, :, :, :], bw_h[-1, :, :, :]], -1) self.enc = tf.concat( fw_h[:, :, -1, :], bw_h[:, :, 0, :], -1) with tf.variable_scope("decoder"): self.dec = embedding( self.y, vocab_size=len(decode2idx), num_units=hp.embedding_dim, scale=False, scope="decode_embed") # LSTM cell = lstm_stack( hp.hidden_units, hp.dropout_rate, is_training) # N, max_ques_size, hidden_units h, _ = tf.nn.dynamic_rnn( cell, self.y, y_len, initial_state=self.enc, dtype='float') with tf.variable_scope("attention"): wb = tf.get_variable("wb", [2 * hp.hidden_units, hp.hidden_units], initializer=tf.truncated_normal_initializer(stddev=1.0)) # att shape: N, max_ques_size, max_sent_size logits = tf.matmul( h, tf.matmul(bt, tf.expand_dims(wb, 0)), transpose_b=True) logits_masks = tf.sign(tf.abs(logits)) # construct negative infi.. paddings = tf.ones_like(logits_masks) * (-2**32+1) logits = tf.where(tf.equal(logits_masks, 0), paddings, logits) att = tf.nn.softmax(logits) att_masks = tf.sign(self.y) att_masks = tf.tranpose(att_masks, perm=[0, 2, 1]) att_masks = tf.tile(att_masks, [1, 1, tf.shape(self.x)[-1]]) paddings = tf.zeros(att) # N, max_ques_size, max_sent_size att = tf.where(tf.equal(att_masks, 0), paddings, att) # N, max_ques_size, 2 * hidden_size c = tf.matmul(att, bt) c_masks = tf.sign(self.y) c_masks = tf.tranpose(c_masks, perm=[0, 2, 1]) c_masks = tf.tile(c_masks, [1, 1, tf.shape(self.y)[-1]]) paddings = tf.zeros(c) c = tf.where(tf.equal(c_masks, 0), paddings, c) with tf.variable_scope("prob"): combine = tf.concat([h, c], 2) wt = tf.get_variable("wt", [2 * hp.hidden_units, hp.hidden_units], initializer=tf.truncated_normal_initializer(stddev=1.0)) logits = tf.matmul( combine, tf.expand_dims(wt, 0)) # tanh(0) == 0 => so no masks.. # N, max_ques_size, hidden_units logits = tanh(logits) ws = tf.get_variable("ws", [hp.hidden_units, len(decode2idx)], initializer=tf.truncated_normal_initializer(stddev=1.0)) logits = tf.matmul( logits, tf.expand_dims(ws, 0)) # N, max_ques_size, len(decode2idx) probs = tf.nn.softmax(logits) preds = tf.argmax(probs, 2) if is_training: flat_probs = tf.reshape(probs, [-1, len(decode2idx)]) indices = tf.range(tf.shape(flat_probs)[0]) indices = tf.concat([indices, tf.reshape(self.y, [-1, 1])], 1) y_probs = tf.gather_nd(probs, indices) y_probs = tf.where( tf.equal(tf.reshape(self.y, [-1, 1]), 0), tf.zeros(y_probs, dtype='float'), y_probs) self.loss = tf.log(y_probs) self.loss = -tf.reduce_sum(self.loss) else:
def tf_tranpose(inputs, perm=[0, 2, 3, 1, 4], name=None): return tf.tranpose(inputs, perm, name)
tf.truncated_normal([n_classes, n_hidden_1], stddev=1.0 / math.sqrt(n_hidden_1)) ) nce_biases = tf.Variable(tf.zeros([n_classes])) loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=y_batch, inputs=pred, num_sampled=10, num_classes=n_classes)) cost = tf.reduce_sum(loss) / batch_size optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) out_layer = tf.matmul(pred, tf.tranpose(nce_weights)) + nce_biases#[batch_size, n_classes] init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) # Training cycle start_time = time.time() total_batch = int(len(train_lst) / batch_size) print("total batch of training data: ", total_batch) for epoch in range(training_epochs): avg_cost = 0.0 for i in range(total_batch): x, y, batch_mask, word_number = read_data(i*batch_size, batch_size, train_lst) _, c = sess.run([optimizer, cost], feed_dict={x_batch: x, emb_mask: batch_mask, word_num: word_number, y_batch: y}) avg_cost += c / total_batch