def auto_regressive_model(input, target, weights, bias): """ Builds the auto regressive model. For details on the model, refer to the written report """ hidden01 = tf.matmul(normalize(input), weights['M1']) # V_d hidden01 = tf.batch_matmul(tf.expand_dims(hidden01,2),tf.ones([batch_size,1,NUM_NOTES])) # V_d augmented to D across dimension 2 hidden02 = cumsum_weights(normalize(target), weights['M2'],D) # V_c hidden = hidden01 + hidden02 y = tf.zeros([1], tf.float32) split = tf.split(0, batch_size, hidden) y = tf.batch_matmul(tf.expand_dims(tf.transpose(tf.squeeze(split[0])), 1), tf.expand_dims(tf.transpose(weights['W']), 2)) for i in range(1, len(split)): y = tf.concat(0, [y, tf.batch_matmul(tf.expand_dims(tf.transpose(tf.squeeze(split[i])), 1), tf.expand_dims(tf.transpose(weights['W']), 2))]) y = tf.squeeze(y) output = tf.reshape(y,[batch_size,NUM_NOTES]) return output
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def test_lanczos_bidiag(self): np.random.seed(1) a_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape_)).reshape(shape_).astype(dtype_) tol = 1e-12 if dtype_ == np.float64 else 1e-5 with self.test_session() as sess: if use_static_shape_: a = tf.constant(a_np) else: a = tf.placeholder(dtype_) operator = util.create_operator(a) lbd = lanczos.lanczos_bidiag( operator, steps_, orthogonalize=orthogonalize_) # The computed factorization should satisfy the equations # A * V = U * B # A' * U[:, :-1] = V * B[:-1, :]' av = tf.batch_matmul(a, lbd.v) ub = lanczos.bidiag_matmul(lbd.u, lbd.alpha, lbd.beta, adjoint_b=False) atu = tf.batch_matmul(a, lbd.u[:, :-1], adj_x=True) vbt = lanczos.bidiag_matmul(lbd.v, lbd.alpha, lbd.beta, adjoint_b=True) if use_static_shape_: av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt]) else: av_val, ub_val, atu_val, vbt_val = sess.run([av, ub, atu, vbt], feed_dict={a: a_np}) self.assertAllClose(av_val, ub_val, atol=tol, rtol=tol) self.assertAllClose(atu_val, vbt_val, atol=tol, rtol=tol)
def log_likelihood(batch): #batch is NxD matrix, where N is length of batch, D is dimension of samples #P(D|w) = prod( sum( pi*N(samp|k)) #exp(-square(mean-samp)) #multiplying by ones replicates the matrix, becomes (N,D,K) tmp1 = tf.batch_matmul(tf.reshape(batch, [N,D,1]), tf.ones([N,1,K])) #same but with the means matrix tmp2 = tf.batch_matmul(means, tf.ones([K,1,N])) tmp2 = tf.transpose(tmp2, [2,1,0]) # (x - mu) tmp3 = tmp1 - tmp2 tmp4 = tmp1 - tmp2 # (x - mu).T(x - mu) tmp3 = tf.batch_matmul(tf.transpose(tmp3, [0,2,1]), tmp3) tmp3 = tf.reduce_sum(tmp3,2) # -(x - mu).T(x - mu) tmp3 = -tmp3 # exp(-(x - mu).T(x - mu)) tmp3 = tf.exp(tmp3) #multiply by mixture weights tmp3 = tf.matmul(tmp3, mixture_weights) #log tmp3 = tf.log(tmp3) #sum over all samples of the batch tmp3 = tf.reduce_sum(tmp3,0) return tmp3
def build_node(self, x_in, c_in, h_in, scope="lstm_cell"): #print (x_in, c_in, h_in, scope) #print [type(thing) for thing in (x_in, c_in, h_in, scope)] # print [(item.name, item.dtype) for thing in (h_in, c_in) for item in thing] # print (x_in.name, x_in.dtype) with tf.variable_scope(scope): # print x.shape # print h_in.get_shape() x_with_h = tf.concat(2, [x_in, h_in]) ones_for_bias = tf.constant(np.ones([batch_size,1,1]), name="b", dtype=tf.float32) x_h_concat = tf.concat(2, [ones_for_bias, x_with_h]) # forget gate layer # print "w_f: ", self.w_f.get_shape() # print "x_h_concat: ", x_h_concat.get_shape() f = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_f)) # candidate values i = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_i)) candidate_c = tf.tanh(tf.batch_matmul(x_h_concat, self.w_c)) # new cell state (hidden) # forget old values of c old_c_to_keep = tf.mul(f, c_in) # scaled candidate values of c new_c_to_keep = tf.mul(i, candidate_c) c = tf.add(old_c_to_keep, new_c_to_keep) # new scaled output o = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_o)) h = tf.mul(o, tf.tanh(c)) return (c, h)
def __init__(self, memory_cells, query, project_query=False): """Define Attention. Args: memory_cells (SequenceBatch): a SequenceBatch containing a Tensor of shape (batch_size, num_cells, cell_dim) query (Tensor): a tensor of shape (batch_size, query_dim). project_query (bool): defaults to False. If True, the query goes through an extra projection layer to coerce it to cell_dim. """ cell_dim = memory_cells.values.get_shape().as_list()[2] if project_query: # project the query up/down to cell_dim self._projection_layer = Dense(cell_dim, activation='linear') query = self._projection_layer(query) # (batch_size, cand_dim) memory_values, memory_mask = memory_cells.values, memory_cells.mask # batch matrix multiply to compute logit scores for all choices in all batches query = tf.expand_dims(query, 2) # (batch_size, cell_dim, 1) logit_values = tf.batch_matmul(memory_values, query) # (batch_size, num_cells, 1) logit_values = tf.squeeze(logit_values, [2]) # (batch_size, num_cells) # set all pad logits to negative infinity logits = SequenceBatch(logit_values, memory_mask) logits = logits.with_pad_value(-float('inf')) # normalize to get probs probs = tf.nn.softmax(logits.values) # (batch_size, num_cells) retrieved = tf.batch_matmul(tf.expand_dims(probs, 1), memory_values) # (batch_size, 1, cell_dim) retrieved = tf.squeeze(retrieved, [1]) # (batch_size, cell_dim) self._logits = logits.values self._probs = probs self._retrieved = retrieved
def lstm_cell(i, o, state): """ Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf Note that in this formulation, we omit the various connections between the previous state and the gates. """ i_list = tf.pack([i, i, i, i]) #print i_list.get_shape().as_list() o_list = tf.pack([o, o, o, o]) ins = tf.batch_matmul(i_list, fico_x) outs = tf.batch_matmul(o_list, fico_m) h_x = ins + outs + fico_b #print h_x.get_shape().as_list() #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) forget_gate = tf.sigmoid(h_x[0,:,:]) #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) input_gate = tf.sigmoid(h_x[1,:,:]) #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb) update = tf.tanh(h_x[2,:,:]) state = forget_gate*state + input_gate*update #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) output_gate = tf.sigmoid(h_x[3,:,:]) h = output_gate * tf.tanh(state) #print 'h', h.get_shape().as_list() return h, state
def extract_patch(x, f_y, f_x, nchannels): """ Args: x: [B, H, W, D] f_y: [B, H, FH] f_x: [B, W, FH] nchannels: D Returns: patch: [B, FH, FW] """ patch = [None] * nchannels fsize_h = tf.shape(f_y)[2] fsize_w = tf.shape(f_x)[2] hh = tf.shape(x)[1] ww = tf.shape(x)[2] for dd in xrange(nchannels): # [B, H, W] x_ch = tf.reshape( tf.slice(x, [0, 0, 0, dd], [-1, -1, -1, 1]), tf.pack([-1, hh, ww])) patch[dd] = tf.reshape(tf.batch_matmul( tf.batch_matmul(f_y, x_ch, adj_x=True), f_x), tf.pack([-1, fsize_h, fsize_w, 1])) return tf.concat(3, patch)
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_) a += a.T a = np.tile(a, batch_shape + (1, 1)) if dtype_ == np.float32: atol = 1e-4 else: atol = 1e-12 for compute_v in False, True: np_e, np_v = np.linalg.eig(a) with self.test_session(): if compute_v: tf_e, tf_v = tf.self_adjoint_eig(tf.constant(a)) # Check that V*diag(E)*V^T is close to A. a_ev = tf.batch_matmul( tf.batch_matmul(tf_v, tf.batch_matrix_diag(tf_e)), tf_v, adj_y=True) self.assertAllClose(a_ev.eval(), a, atol=atol) # Compare to numpy.linalg.eig. CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(), atol) else: tf_e = tf.self_adjoint_eigvals(tf.constant(a)) self.assertAllClose( np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
def build_memory(self): self.global_step = tf.Variable(0, name="global_step") # Linear Projection Layer self.T = tf.Variable(tf.random_normal([self.idim, self.edim], stddev=self.init_std, name="projection")) reshape = tf.reshape(self.story, [-1, self.idim]) m = tf.matmul(reshape, self.T) # [batch_size * nstory, edim] m = tf.reshape(m, [self.batch_size, self.nstory, -1]) reshape = tf.reshape(self.query, [-1, self.idim]) u = tf.matmul(reshape, self.T) # [batch_size * 1, edim] u = tf.reshape(u, [self.batch_size, 1, -1]) reshape = tf.reshape(self.answer, [-1, self.idim]) g = tf.matmul(reshape, self.T) # [batch_size * nanswer, edim] g = tf.reshape(g, [self.batch_size, self.nanswer, -1]) for h in xrange(self.nhop): p = tf.batch_matmul(m, u, adj_y=True) # [batch_size, nstory. 1] p = tf.reshape(p, [self.batch_size, -1]) p = tf.nn.softmax(p) # [batch_size, nstory] reshape = tf.reshape(p, [self.batch_size, -1, 1]) o = tf.reduce_sum(tf.mul(m, reshape), 1) u = tf.add(o, u) logits = tf.batch_matmul(g, u, adj_y=True) # [batch_size, nanswer, 1] logits = tf.reshape(logits, [self.batch_size, -1]) self.logits = logits self.probs = tf.nn.softmax(logits)
def write(self, M0, write_w0s, write_heads): write_w1s = [] for i in xrange(self.n_heads): head = write_heads[i] w0 = write_w0s[i] w1 = NTMCell.address(M0, w0, head) # For analysis #w1 = tf.Print(w1, [w1], "write", summarize=1000) write_w1s.append(w1) M1 = M0 # Erases for w1 in write_w1s: we = 1 - tf.batch_matmul( tf.expand_dims(w1, 2), tf.expand_dims(head["erase"], 1) ) M1 = M1 * we # Writes for w1 in write_w1s: add = tf.batch_matmul( tf.expand_dims(w1, 2), tf.expand_dims(head["add"], 1), ) M1 = M1 + add return M1, write_w1s
def write(self, lstm_h, Fx, Fy, gamma): with tf.variable_scope("writeW",reuse=self.share): w = self.linear(lstm_h, self.N * self.N) # batch x (write_n*write_n) w = tf.reshape(w, [-1, self.N, self.N]) Fyt = tf.transpose(Fy, perm=[0, 2, 1]) wr = tf.batch_matmul(Fyt, tf.batch_matmul(w, Fx)) return wr*tf.reshape(1.0/gamma, [-1,1,1])
def copy_net(decoder_out): with tf.variable_scope('copy_net') as scope: decoder_out = tf.reshape(decoder_out, [-1, decoder_hidden, 1]) source_prob = tf.batch_matmul(rnn_encoder_temp, decoder_out) source_prob = tf.reshape(source_prob, [-1, 1, source_prob.get_shape().as_list()[1]]) voc_prob = tf.batch_matmul(source_prob, one_hot) voc_prob = tf.reshape(voc_prob, [-1, voc_prob.get_shape().as_list()[-1]]) return voc_prob
def build_memory(self): self.global_step = tf.Variable(0, name="global_step") # embedding matrix A of dimension d*V, # converting x_i into memory vectors v_i self.A = tf.Variable(tf.random_normal([self.nwords, self.edim], stddev=self.init_std)) # embedding matrix B with the same dimension as A # converting q to obtain an internal state u self.B = tf.Variable(tf.random_normal([self.nwords, self.edim], stddev=self.init_std)) # C converts x into o self.C = tf.Variable(tf.random_normal([self.edim, self.edim], stddev=self.init_std)) # Temporal Encoding self.T_A = tf.Variable(tf.random_normal([self.mem_size, self.edim], stddev=self.init_std)) self.T_B = tf.Variable(tf.random_normal([self.mem_size, self.edim], stddev=self.init_std)) # m_i = sum A_ij * x_ij + T_A_i # this embedding_lookup functions retrieves rows of self.A Ain_c = tf.nn.embedding_lookup(self.A, self.context) # context is the previous words Ain_t = tf.nn.embedding_lookup(self.T_A, self.time) # time is for temporal Ain = tf.add(Ain_c, Ain_t) # c_i = sum B_ij * u + T_B_i # ???? is it B or C, looks like B is correct, but the notation is different from the paper Bin_c = tf.nn.embedding_lookup(self.B, self.context) Bin_t = tf.nn.embedding_lookup(self.T_B, self.time) Bin = tf.add(Bin_c, Bin_t) # 6 hops to go through for h in range(self.nhop): # reshape hid to be 3 dimensional self.hid3dim = tf.reshape(self.hid[-1], [-1, 1, self.edim]) # -1 is used to infer the shape # innerproduct of the memory units and the input vector # A_in stores the memory units, i.e., the context and temporal # hid represents the hidden state, and what is that? Aout = tf.batch_matmul(self.hid3dim, Ain, adj_y=True) Aout2dim = tf.reshape(Aout, [-1, self.mem_size]) P = tf.nn.softmax(Aout2dim) probs3dim = tf.reshape(P, [-1, 1, self.mem_size]) Bout = tf.batch_matmul(probs3dim, Bin) # the output vector Bout2dim = tf.reshape(Bout, [-1, self.edim]) Cout = tf.matmul(self.hid[-1], self.C) Dout = tf.add(Cout, Bout2dim) # W(o + u) self.share_list[0].append(Cout) if self.lindim == self.edim: self.hid.append(Dout) elif self.lindim == 0: self.hid.append(tf.nn.relu(Dout)) else: F = tf.slice(Dout, [0, 0], [self.batch_size, self.lindim]) G = tf.slice(Dout, [0, self.lindim], [self.batch_size, self.edim-self.lindim]) K = tf.nn.relu(G) self.hid.append(tf.concat(1, [F, K]))
def read(self, x, Fx, Fy, gamma): Fxr = tf.reshape(Fx, [-1, 1, self.N, self.shape[1]]) Fyr = tf.reshape(Fy, [-1, 1, self.N, self.shape[2]]) Fxr3 = tf.concat(1, [Fxr, Fxr, Fxr]) # batch * 3 * N * A Fyr3 = tf.concat(1, [Fyr, Fyr, Fyr]) Fxt3 = tf.transpose(Fxr3, perm=[0, 1, 3, 2]) glimpse = tf.batch_matmul(Fyr3, tf.batch_matmul(x, Fxt3)) glimpse = tf.reshape(glimpse, [-1, self.att_size]) return glimpse * tf.reshape(gamma, [-1,1])
def get_function(points, mu, sigma): # f_ik [n,k] div = coef*tf.rsqrt(tf.batch_matrix_determinant(sigma)) # ((2pi)^p*|S_k|)^-1/2 [k] div = tf.tile(tf.reshape(div, [1,k]), [n,1]) # [n,k] diff = tf.sub(tf.tile(points, [k,1,1]), tf.tile(mu, [n,1,1])) # x_i-u_k [n*k, p, 1] sigma = tf.tile(sigma, [n,1,1]) # [n*k,p,p] exp = tf.exp(-0.5*tf.batch_matmul( tf.transpose(diff,perm=[0,2,1]), tf.batch_matmul(tf.batch_matrix_inverse(sigma), diff) )) # e^(d'*S^-1*d)_ik [n*k, 1, 1] exp = tf.reshape(exp, [n,k]) return tf.mul(div, exp) # Multivariate normal distribution evaluated for each vector, for each cluster parameter. Hence the [n,k] shape.
def write(windows, N, center_x, center_y, delta, sigma, gamma): tol = 1e-5 W = tf.reshape(windows, [-1, N, N]) FX, FY = banks(center_x, center_y, sigma, delta, N, (28,28)) I = tf.batch_matmul(W, FY); I = tf.batch_matmul(tf.transpose(FX, [0,2,1]), I) return tf.expand_dims(1/(gamma + tol),1)*tf.reshape(I, [-1, 28*28])
def model(input1, gating_network): # return tf.nn.softmax(tf.matmul(tf.transpose(gating_network), (tf.reshape(tf.batch_matmul(w, input_aa), [L-1, n_aa]) + b))) input_times_w = tf.reshape(tf.batch_matmul(w, input1), [L, L, n_aa]) input_times_w_plus_b = input_times_w + b activation_function = tf.nn.relu(input_times_w_plus_b) # activation_function = tf.sigmoid(input_times_w_plus_b) use_gate = tf.batch_matmul(tf.transpose(activation_function, perm=[0, 2, 1]), tf.transpose(gating_network, perm=[0,1,2])) #perm=[1,0,2] softmax_output = tf.nn.softmax(tf.reshape(use_gate, [L, n_aa])) return softmax_output
def buildSimilarity(self): q_feature = self.tensors['q_feature'] a_feature = self.tensors['a_feature'] with tf.name_scope('similarity'): q_norm = tf.sqrt(tf.reduce_sum(q_feature ** 2, reduction_indices=[1], keep_dims=True)) a_norm = tf.sqrt(tf.reduce_sum(a_feature ** 2, reduction_indices=[1], keep_dims=True)) product = tf.batch_matmul(q_feature, a_feature, adj_x=True, adj_y=False, name="product") denominator = tf.batch_matmul(q_norm, a_norm, adj_x=False, adj_y=True, name="denominator") similarity = tf.squeeze(product / (denominator + EPSILON), [-1,-2], name='similarity') self.tensors['similarity'] = similarity
def read(images, N, delta, gamma, sigma, center_x, center_y): #TODO: Make configurable shape FX, FY = banks(center_x, center_y, sigma, delta, N, (28,28)) I = tf.reshape(images, [-1, 28, 28]) I = tf.batch_matmul(FY, I); I = tf.batch_matmul(I, tf.transpose(FX, [0,2,1])) return tf.expand_dims(gamma,1)*tf.reshape(I, [-1, N*N])
def threee_tensor_mul(A, B, C, res): # for example # A = tf.ones([4, 3, 2], tf.int32) # B = tf.ones([4, 2, 5, 3], tf.int32) # C = tf.ones([4, 5, 6], tf.int32) # return: (4, 3, 6) which combine 3 channel of matrix multiplication c = B.get_shape().as_list()[-1] res += tf.batch_matmul(tf.batch_matmul(A, B), C) return res
def build_generator(self): video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) pos_mask = tf.placeholder(tf.float32,[self.batch_size]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.mul(image_emb, tf.tile(tf.expand_dims(tf.expand_dims(pos_mask, 1), 1),[1, self.n_lstm_steps, self.dim_hidden])) image_emb = tf.concat(2,[image_emb, tf.tile(tf.expand_dims(1-tf.expand_dims(pos_mask,1), 1),[1, self.n_lstm_steps, 1])]) image_emb = tf.transpose(image_emb, [1,0,2]) state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) h_prev = tf.zeros([self.batch_size, self.dim_hidden]) generated_words = [] current_embed = tf.zeros([self.batch_size, self.dim_hidden]) brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1]) # n x h x 1 image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) + self.embed_att_ba # n x b x h for i in range(16): e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h e = tf.batch_matmul(e, brcst_w) e = tf.reduce_sum(e,2) # n x b e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b denomin = tf.reduce_sum(e_hat_exp,0) # b denomin = denomin + tf.to_float(tf.equal(denomin, 0)) alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden+1]) # n x b x h attention_list = tf.mul(alphas, image_emb) # n x b x h atten = tf.reduce_sum(attention_list,0) # b x h if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM3") as vs: output1, state1 = self.lstm3( tf.concat(1,[atten, current_embed]), state1 ) # b x h lstm3_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h #with tf.variable_scope("LSTM2"): # output2, state2 = self.lstm2( tf.concat(1,[current_embed,output1]), state2 ) h_prev = output1 logit_words = tf.nn.xw_plus_b( output2, self.embed_word_W, self.embed_word_b) # b x w max_prob_index = tf.argmax(logit_words, 1) # b generated_words.append(max_prob_index) # b #current_embed = tf.matmul(logit_words,self.Wemb_W) + self.Wemb_b # b x h #current_embed = tf.nn.xw_plus_b( logit_words, self.Wemb_W, self.Wemb_b) # b x h with tf.device("/cpu:0"): current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index) # current_embed = tf.expand_dims(current_embed, 0) generated_words = tf.transpose(tf.pack(generated_words)) return video, video_mask, generated_words, pos_mask, lstm3_variables
def get_function(points, mu, sigma): # f_ik [n,k] div = coef * tf.rsqrt(tf.batch_matrix_determinant(sigma)) # ((2pi)^p*|S_k|)^-1/2 [k] div = tf.tile(tf.reshape(div, [1, k]), [n, 1]) # [n,k] diff = tf.sub(tf.tile(points, [k, 1, 1]), tf.tile(mu, [n, 1, 1])) # x_i-u_k [n*k, p, 1] sigma = tf.tile(sigma, [n, 1, 1]) # [n*k,p,p] exp = tf.exp( -0.5 * tf.batch_matmul(tf.transpose(diff, perm=[0, 2, 1]), tf.batch_matmul(tf.batch_matrix_inverse(sigma), diff)) ) # e^(d'*S^-1*d)_ik [n*k, 1, 1] exp = tf.reshape(exp, [n, k]) return tf.mul(div, exp)
def write_attn(h_dec): with tf.variable_scope("writeW",reuse=DO_SHARE): w=linear(h_dec,write_size) # batch x (write_n*write_n) N=write_n w=tf.reshape(w,[batch_size,N,N]) Fx,Fy,gamma=attn_window("write",h_dec,write_n) Fyt=tf.transpose(Fy,perm=[0,2,1]) wr=tf.batch_matmul(Fyt,tf.batch_matmul(w,Fx)) wr=tf.reshape(wr,[batch_size,B*A]) #gamma=tf.tile(gamma,[1,B*A]) return wr*tf.reshape(1.0/gamma,[-1,1])
def write(h_dec): """Function to implement 29""" with tf.variable_scope("writeW",reuse=REUSE_T): w=linear(h_dec,write_size) # batch x (patch_write*patch_write) N=patch_write w=tf.reshape(w,[batch_size,N,N]) Fx,Fy,gamma=attn_window("write",h_dec,patch_write) Fyt=tf.transpose(Fy,perm=[0,2,1]) wr=tf.batch_matmul(Fyt,tf.batch_matmul(w,Fx)) wr=tf.reshape(wr,[batch_size,B*A]) #gamma=tf.tile(gamma,[1,B*A]) return wr*tf.reshape(1.0/gamma,[-1,1])
def train_graph(self, video, video_mask, caption, caption_mask): video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x n) x d image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (b x n) x h image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) # b x n x h image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) # b x s h_prev = tf.zeros([self.batch_size, self.dim_hidden]) # b x h loss_caption =tf.zeros([self.batch_size]) current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1]) # n x h x 1 image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) + self.embed_att_ba # n x b x h for i in range(16): e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h e = tf.batch_matmul(e, brcst_w) e = tf.reduce_sum(e,2) # n x b e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b denomin = tf.reduce_sum(e_hat_exp,0) # b denomin = denomin + tf.to_float(tf.equal(denomin, 0)) alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden]) # n x b x h attention_list = tf.mul(alphas, image_emb) # n x b x h atten = tf.reduce_sum(attention_list,0) # b x h #current_embed = tf.nn.xw_plus_b( onehot_labels, self.Wemb_W, self.Wemb_b) # b x h #with tf.device("/cpu:0"): # current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i-1]) if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM3"): output1, state1 = self.lstm3_dropout( tf.concat(1,[atten, current_embed]), state1 ) # b x h output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h #with tf.variable_scope("LSTM2"): # output2, state2 = self.lstm2_dropout( tf.concat(1,[current_embed, output1]), state2 ) h_prev = output1 # b x h labels = tf.expand_dims(caption[:,i], 1) # b x 1 indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1 concated = tf.concat(1, [indices, labels]) # b x 2 onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w #current_embed = tf.matmul(onehot_labels,self.Wemb_W) + self.Wemb_b # b x h with tf.device("/cpu:0"): current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i]) logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # b x 1 cross_entropy = cross_entropy * caption_mask[:,i] # b x 1 loss_caption += cross_entropy # 1 loss_caption = loss_caption / tf.reduce_sum(caption_mask, 1) return loss_caption
def build_model(self): video = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) # b x n x d video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) # b x n caption = tf.placeholder(tf.int32, [self.batch_size, n_caption_step]) # b x 16 caption_mask = tf.placeholder(tf.float32, [self.batch_size, n_caption_step]) # b x 16 video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x n) x d image_emb = tf.nn.xw_plus_b( video_flat, self.encode_image_W, self.encode_image_b) # (b x n) x h image_emb = tf.reshape(image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) # b x n x h image_emb = tf.transpose(image_emb, [1,0,2]) # n x b x h state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) # b x s h_prev = tf.zeros([self.batch_size, self.dim_hidden]) # b x h loss_caption = 0.0 current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps,1,1]) # n x h x 1 image_part = tf.batch_matmul(image_emb, tf.tile(tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps,1,1])) + self.embed_att_ba # n x b x h for i in range(n_caption_step): e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h e = tf.batch_matmul(e, brcst_w) # unnormalized relevance score e = tf.reduce_sum(e,2) # n x b e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b denomin = tf.reduce_sum(e_hat_exp,0) # b denomin = denomin + tf.to_float(tf.equal(denomin, 0)) # regularize denominator alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp,denomin),2),[1,1,self.dim_hidden]) # n x b x h # normalize to obtain alpha attention_list = tf.mul(alphas, image_emb) # n x b x h atten = tf.reduce_sum(attention_list,0) # b x h # soft-attention weighted sum if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM3"): output1, state1 = self.lstm3_dropout( tf.concat(1,[atten, current_embed]), state1 ) # b x h output2 = tf.tanh(tf.nn.xw_plus_b(tf.concat(1,[output1,atten,current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h h_prev = output1 # b x h labels = tf.expand_dims(caption[:,i], 1) # b x 1 indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1 concated = tf.concat(1, [indices, labels]) # b x 2 onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w with tf.device("/cpu:0"): current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:,i]) logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) # b x 1 cross_entropy = cross_entropy * caption_mask[:,i] # b x 1 loss_caption += tf.reduce_sum(cross_entropy) # 1 loss_caption = loss_caption / tf.reduce_sum(caption_mask) loss = loss_caption return loss, video, video_mask, caption, caption_mask
def sampleQ_psi(z,u,Q_phi): A,B,o,v,r=transition(z) with tf.variable_scope("sampleQ_psi"): mu_t=tf.expand_dims(Q_phi.mu,-1) # batch,z_dim,1 Amu=tf.squeeze(tf.batch_matmul(A,mu_t), [-1]) u=tf.expand_dims(u,-1) # batch,u_dim,1 Bu=tf.squeeze(tf.batch_matmul(B,u),[-1]) Q_psi=NormalDistribution(Amu+Bu+o,Q_phi.sigma,Q_phi.logsigma, v, r) # the actual z_next sample is generated by deterministically transforming z_t z=tf.expand_dims(z,-1) Az=tf.squeeze(tf.batch_matmul(A,z),[-1]) z_next=Az+Bu+o return z_next,Q_psi#,(A,B,o,v,r) # debugging
def tensor(self, domain=None): if self.defined is not None: if domain is None: return self.defined(self.type_idx, self.domain.tensor) else: return self.defined(self.type_idx, domain.tensor) if domain is None: domain = self.domain X = domain.tensor XW = tf.batch_matmul(tf.tile(tf.expand_dims(X, 0), [self.number_of_layers, 1, 1]), self.W) XWX = tf.squeeze(tf.batch_matmul(tf.expand_dims(X, 1), tf.transpose(XW, [1, 2, 0]))) XV = tf.matmul(X, tf.transpose(self.V)) gX = tf.matmul(tf.tanh(XWX + XV + self.b), self.u) return tf.sigmoid(gX)
def model(input1, gating_network): input_times_w_plus_b = tf.reshape(tf.batch_matmul(w, input1), [L, L, n_aa]) + b #the softmax takes in a matrix have to do it myself exp_ = tf.exp(input_times_w_plus_b) sums = tf.reshape(tf.reduce_sum(exp_, 2), [L,L,1]) try1 = tf.tile(sums, [1,1,n_aa]) activation_function = exp_ / try1 # activation_function = tf.sigmoid(input_times_w_plus_b) # activation_function = tf.nn.relu(softmaxed) use_gate = tf.batch_matmul(tf.transpose(activation_function, perm=[0, 2, 1]), tf.transpose(gating_network, perm=[0,1,2])) #perm=[1,0,2] # output = tf.nn.softmax(tf.reshape(use_gate, [L, n_aa])) output = tf.reshape(use_gate, [L, n_aa]) return output
def build_model(self): video = tf.placeholder( tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) # b x n x d video_mask = tf.placeholder( tf.float32, [self.batch_size, self.n_lstm_steps]) # b x n caption = tf.placeholder(tf.int32, [self.batch_size, n_caption_step]) # b x 16 caption_mask = tf.placeholder( tf.float32, [self.batch_size, n_caption_step]) # b x 16 video_flat = tf.reshape(video, [-1, self.dim_image]) # (b x n) x d image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W, self.encode_image_b) # (b x n) x h image_emb = tf.reshape( image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) # b x n x h image_emb = tf.transpose(image_emb, [1, 0, 2]) # n x b x h state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) # b x s h_prev = tf.zeros([self.batch_size, self.dim_hidden]) # b x h loss_caption = 0.0 current_embed = tf.zeros([self.batch_size, self.dim_hidden]) # b x h brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps, 1, 1]) # n x h x 1 image_part = tf.batch_matmul( image_emb, tf.tile( tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps, 1, 1])) + self.embed_att_ba # n x b x h for i in range(n_caption_step): e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h e = tf.batch_matmul(e, brcst_w) # unnormalized relevance score e = tf.reduce_sum(e, 2) # n x b e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b denomin = tf.reduce_sum(e_hat_exp, 0) # b denomin = denomin + tf.to_float(tf.equal( denomin, 0)) # regularize denominator alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2), [1, 1, self.dim_hidden ]) # n x b x h # normalize to obtain alpha attention_list = tf.mul(alphas, image_emb) # n x b x h atten = tf.reduce_sum( attention_list, 0) # b x h # soft-attention weighted sum if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM3"): output1, state1 = self.lstm3_dropout( tf.concat(1, [atten, current_embed]), state1) # b x h output2 = tf.tanh( tf.nn.xw_plus_b(tf.concat(1, [output1, atten, current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h h_prev = output1 # b x h labels = tf.expand_dims(caption[:, i], 1) # b x 1 indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) # b x 1 concated = tf.concat(1, [indices, labels]) # b x 2 onehot_labels = tf.sparse_to_dense( concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) # b x w with tf.device("/cpu:0"): current_embed = tf.nn.embedding_lookup(self.Wemb, caption[:, i]) logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logit_words, onehot_labels) # b x 1 cross_entropy = cross_entropy * caption_mask[:, i] # b x 1 loss_caption += tf.reduce_sum(cross_entropy) # 1 loss_caption = loss_caption / tf.reduce_sum(caption_mask) loss = loss_caption return loss, video, video_mask, caption, caption_mask
def build_graph(self): with self.graph.as_default(): # placeholders self.X = tf.placeholder(tf.int64, [None, self.n_features], name='X') self.Y = tf.placeholder(tf.float32, (None), name='Y') # list of TT-cores self.G = [None]*self.n_features # list of TT-cores used for penalty self.G_exp = [None]*self.n_features for i in range(self.n_features): shape = [self.s_features[i] + 1, self.rank, self.rank] if i==0: shape = [self.s_features[i] + 1, 1, self.rank] if i==(self.n_features - 1): shape = [self.s_features[i] + 1, self.rank, 1] content = None if self.init_vals is None: content = tf.random_normal(shape, stddev=self.init_std) else: assert(self.init_vals[i].shape==tuple(shape)) content = self.init_vals[i] + tf.random_normal(shape, stddev=self.init_std) self.G[i] = tf.Variable(content, trainable=True, name='G_{}'.format(i)) exp_weights = tf.constant([1] + [self.exp_reg] * self.s_features[i], shape=(self.s_features[i] + 1, 1, 1)) self.G_exp[i] = self.G[i] * exp_weights # main computation part cur_col = self.X[:, 0] tower = tf.gather(self.G[0], cur_col) self.outputs = tf.add(self.G[0][0], tower) for i in range(1, self.n_features): cur_col = self.X[:, i] cur_tower = tf.gather(self.G[i], cur_col) cur_A = tf.add(self.G[i][0], cur_tower) self.outputs = tf.batch_matmul(self.outputs, cur_A) self.outputs = tf.squeeze(self.outputs, [1, 2]) # regularization penalty self.penalty = tf.reshape( tensor=tf.einsum('nip,njq->ijpq', self.G_exp[0], self.G_exp[0]), shape=(1, self.rank**2) ) for i in range(1, self.n_features): last_dim = 1 if i==self.n_features-1 else self.rank**2 summed_kron_prod = tf.reshape( tensor=tf.einsum('nip,njq->ijpq', self.G_exp[i], self.G_exp[i]), shape=(self.rank**2, last_dim) ) self.penalty = tf.matmul(self.penalty, summed_kron_prod) # MSE loss self.loss = tf.reduce_mean((self.outputs - self.Y)**2) # # LogLoss # self.margins = -self.Y * self.outputs # sself.raw_loss = tf.log(tf.add(1.0, tf.exp(self.margins))) # self.loss = tf.reduce_mean(tf.minimum(self.raw_loss, 100, name='truncated_log_loss')) self.penalized_loss = self.loss + self.reg * tf.squeeze(self.penalty) # others self.trainer = tf.train.AdamOptimizer(0.001).minimize(self.penalized_loss) self.init_all_vars = tf.initialize_all_variables() self.saver = tf.train.Saver()
def build(self): tf.reset_default_graph() with tf.variable_scope("graph", initializer=orthogonal_initializer()): # Variables (matrix of embeddings/transformations) self._ht = ht = tf.get_variable( name='ht', # for t AND h shape=[self.num_cons, self.dim], dtype=tf.float32) self._r = r = tf.get_variable(name='r', shape=[self.num_rels, self.dim], dtype=tf.float32) # Mh has |r| number of matrices, each dedicated to a relation self._Mh = Mh = tf.get_variable( name='Mh', shape=[self.num_rels, self.dim * self.dim], dtype=tf.float32, ) self._ht_assign = ht_assign = tf.placeholder( name='ht_assign', shape=[self.num_cons, self.dim], dtype=tf.float32) self._r_assign = r_assign = tf.placeholder( name='r_assign', shape=[self.num_rels, self.dim], dtype=tf.float32) self._m_assign = m_assign = tf.placeholder( name='r_assign', shape=[self.num_rels, self.dim * self.dim], dtype=tf.float32) # Type A loss : [|| M_hr h + r - M_tr t ||_2 + m1 - || M_hr h' + r - M_tr t' ||_2]+ here [.]+ means max (. , 0) self._A_h_index = A_h_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='A_h_index') self._A_r_index = A_r_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='A_r_index') self._A_t_index = A_t_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='A_t_index') self._A_hn_index = A_hn_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='A_hn_index') self._A_tn_index = A_tn_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='A_tn_index') ''' A_loss_matrix = tf.subtract( tf.add( tf.batch_matmul(A_h_con_batch, tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), A_rel_batch), tf.batch_matmul(A_t_con_batch, tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])) )''' # a batch of vectors multiply a batch of matrices. A_h_con_batch = tf.nn.embedding_lookup(ht, A_h_index) A_t_con_batch = tf.nn.embedding_lookup(ht, A_t_index) A_rel_batch = tf.nn.embedding_lookup(r, A_r_index) A_mat_h_batch = tf.nn.embedding_lookup(Mh, A_r_index) #A_mat_t_batch = tf.nn.embedding_lookup(Mt, A_r_index) A_hn_con_batch = tf.nn.embedding_lookup(ht, A_hn_index) A_tn_con_batch = tf.nn.embedding_lookup(ht, A_tn_index) # This is a batch of h * M_hr given a batch of (h, r, t) A_h_batch_mul = tf.squeeze( tf.batch_matmul( tf.expand_dims(A_h_con_batch, 1), tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1]) # This is a batch of t * M_hr given a batch of (h, r, t) A_t_batch_mul = tf.squeeze( tf.batch_matmul( tf.expand_dims(A_t_con_batch, 1), tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1]) # negative sampled h and t A_hn_batch_mul = tf.squeeze( tf.batch_matmul( tf.expand_dims(A_hn_con_batch, 1), tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1]) A_tn_batch_mul = tf.squeeze( tf.batch_matmul( tf.expand_dims(A_tn_con_batch, 1), tf.reshape(A_mat_h_batch, [-1, self.dim, self.dim])), [1]) # This stores h M_hr + r - t M_tr A_loss_matrix = tf.subtract(tf.add(A_h_batch_mul, A_rel_batch), A_t_batch_mul) # This stores h' M_hr + r - t' M_tr for negative samples A_neg_matrix = tf.subtract(tf.add(A_hn_batch_mul, A_rel_batch), A_tn_batch_mul) # L-2 norm # [||h M_hr + r - t M_tr|| + m1 - ||h' M_hr + r - t' M_tr||)]+ here [.]+ means max (. , 0) if self.L1: self._A_loss = A_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add(tf.reduce_sum(tf.abs(A_loss_matrix), 1), self._m1), tf.reduce_sum(tf.abs(A_neg_matrix), 1)), 0.)) else: self._A_loss = A_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add( tf.sqrt( tf.reduce_sum(tf.square(A_loss_matrix), 1)), self._m1), tf.sqrt(tf.reduce_sum(tf.square(A_neg_matrix), 1))), 0.)) # soft-constraint on vector norms for both positive and negative sampled h and t # [||h|| - 1]+ + [||t|| - 1]+ + [||h'|| - 1]+ + [||t'|| - 1]+ #A_vec_restraint = tf.concat(0, [tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_h_con_batch), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_t_con_batch), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_hn_con_batch), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_tn_con_batch), 1)), 1.), 0.)]) A_vec_restraint = tf.concat([ tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(A_h_con_batch), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(A_t_con_batch), 1)), 1.), 0.) ], 0) # soft-constraint on projected vectors for both positive and negative sampled h and t #A_proj_restraint = tf.concat(0, [tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_h_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_t_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_hn_batch_mul), 1)), 1.), 0.), tf.maximum(tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_tn_batch_mul), 1)), 1.), 0.)]) A_proj_restraint = tf.concat([ tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(A_h_batch_mul), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(A_t_batch_mul), 1)), 1.), 0.) ], 0) A_rel_restraint = tf.maximum( tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(A_rel_batch), 1)), 2.), 0.) # Type B loss : # 2 losses: t-related <- omega(M_t o1, M_t o2) and h-related <- omega(M_h o1, M_h o2) # Let's use || a M_hr + r - b M_tr ||_2 as omega(a,b) # They share the same input place holders # Negative sampling samples only the "many" end self._B_h_index = B_h_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='B_h_index') self._B_r_index = B_r_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='B_r_index') self._B_t_index = B_t_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='B_t_index') # negative sampled h and t self._B_hn_index = B_hn_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='B_hn_index') self._B_tn_index = B_tn_index = tf.placeholder( dtype=tf.int64, shape=[self.batch_size], name='B_tn_index') B_con_h_batch = tf.nn.embedding_lookup(ht, B_h_index) B_con_t_batch = tf.nn.embedding_lookup(ht, B_t_index) B_mat_h_batch = tf.nn.embedding_lookup(Mh, B_r_index) #B_mat_t_batch = tf.nn.embedding_lookup(Mt, B_r_index) B_rel_batch = tf.nn.embedding_lookup(r, B_r_index) B_con_hn_batch = tf.nn.embedding_lookup(ht, B_hn_index) B_con_tn_batch = tf.nn.embedding_lookup(ht, B_tn_index) # multiplication of a batch of vectors and a batch of matrices B_t_batch_mul_head = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_h_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) B_t_batch_mul_tail = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_t_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) # multiplication of a batch of vectors and a batch of matrices for negative samples B_tn_batch_mul_head = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_hn_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) B_tn_batch_mul_tail = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_tn_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) # t*M_hr + r ~ t*M_tr # This stores h M_hr + r - t M_tr for more t's of the singular h's. Below it is the one for negative samples B_t_loss_matrix = tf.subtract( tf.add(B_t_batch_mul_head, B_rel_batch), B_t_batch_mul_tail) B_tn_loss_matrix = tf.subtract( tf.add(B_tn_batch_mul_head, B_rel_batch), B_tn_batch_mul_tail) # [||h M_hr + r - t M_tr|| + m1 - ||h M_hr + r - t' M_tr||]+ Actually only t is corrupted for B_t related batches if self.L1: self._B_t_loss = B_t_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add(tf.reduce_sum(tf.abs(B_t_loss_matrix), 1), self._m2), tf.reduce_sum(tf.abs(B_tn_loss_matrix), 1)), 0.)) else: self._B_t_loss = B_t_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add( tf.sqrt( tf.reduce_sum(tf.square(B_t_loss_matrix), 1)), self._m2), tf.sqrt( tf.reduce_sum(tf.square(B_tn_loss_matrix), 1))), 0.)) # multiplication of a batch of vectors and a batch of matrices B_h_batch_mul_head = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_h_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) B_h_batch_mul_tail = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_t_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) # multiplication of a batch of vectors and a batch of matrices for negative samples B_hn_batch_mul_head = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_hn_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) B_hn_batch_mul_tail = tf.squeeze( tf.batch_matmul( tf.expand_dims(B_con_tn_batch, 1), tf.reshape(B_mat_h_batch, [-1, self.dim, self.dim])), [1]) # t*M_tr - r ~ h*M_hr # This stores h M_hr + r - t M_tr for more h's of the singular t's. Below it is the one for negative samples B_h_loss_matrix = tf.subtract( tf.subtract(B_h_batch_mul_tail, B_rel_batch), B_h_batch_mul_head) B_hn_loss_matrix = tf.subtract( tf.subtract(B_hn_batch_mul_tail, B_rel_batch), B_hn_batch_mul_head) # [||t M_tr - r - h M_hr|| + m2 - ||t M_tr - r - h M_hr|| ]+ Actually only h is corrupted for B_h related batches if self.L1: self._B_h_loss = B_h_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add(tf.reduce_sum(tf.abs(B_h_loss_matrix), 1), self._m2), tf.reduce_sum(tf.abs(B_hn_loss_matrix), 1)), 0.)) else: self._B_h_loss = B_h_loss = tf.reduce_sum( tf.maximum( tf.subtract( tf.add( tf.sqrt( tf.reduce_sum(tf.square(B_h_loss_matrix), 1)), self._m2), tf.sqrt( tf.reduce_sum(tf.square(B_hn_loss_matrix), 1))), 0.)) # penalize on pre- and post-projected vectors whose norm exceeds 1 B_vec_restraint = tf.concat([ tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_con_h_batch), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_con_t_batch), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_con_hn_batch), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_con_tn_batch), 1)), 1.), 0.) ], 0) B_t_proj_restraint = tf.concat([ tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_t_batch_mul_head), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_t_batch_mul_tail), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt( tf.reduce_sum(tf.square(B_tn_batch_mul_head), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt( tf.reduce_sum(tf.square(B_tn_batch_mul_tail), 1)), 1.), 0.) ], 0) B_h_proj_restraint = tf.concat([ tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_h_batch_mul_head), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt(tf.reduce_sum(tf.square(B_h_batch_mul_tail), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt( tf.reduce_sum(tf.square(B_hn_batch_mul_head), 1)), 1.), 0.), tf.maximum( tf.subtract( tf.sqrt( tf.reduce_sum(tf.square(B_hn_batch_mul_tail), 1)), 1.), 0.) ], 0) B_rel_restraint = tf.maximum( tf.subtract(tf.sqrt(tf.reduce_sum(tf.square(B_rel_batch), 1)), 2.), 0.) # Type C loss : Soft-constraint on vector norms #self._C_loss = C_loss = tf.reduce_sum(tf.concat(0, [A_vec_restraint, B_vec_restraint, A_proj_restraint, B_t_proj_restraint, B_h_proj_restraint, A_rel_restraint, B_rel_restraint])) #self._C_loss = C_loss = tf.reduce_sum(tf.concat(0, [A_vec_restraint, B_vec_restraint, A_proj_restraint, B_t_proj_restraint, B_h_proj_restraint])) self._C_loss_A = C_loss_A = tf.reduce_sum( tf.concat([A_vec_restraint, A_proj_restraint, A_rel_restraint], 0)) self._C_loss_B1 = C_loss_B1 = tf.reduce_sum( tf.concat( [B_vec_restraint, B_t_proj_restraint, B_rel_restraint], 0)) self._C_loss_B2 = C_loss_B2 = tf.reduce_sum( tf.concat( [B_vec_restraint, B_h_proj_restraint, B_rel_restraint], 0)) # Force normalize pre-projected vecs # Optimizer self._lr = lr = tf.placeholder(tf.float32) self._opt = opt = tf.train.GradientDescentOptimizer(lr) self._train_op_A = train_op_A = opt.minimize(A_loss) self._train_op_B_t = train_op_B_t = opt.minimize(B_t_loss) self._train_op_B_h = train_op_B_h = opt.minimize(B_h_loss) #self._train_op_C = train_op_C = opt.minimize(C_loss) self._train_op_C_A = train_op_C_A = opt.minimize(C_loss_A) self._train_op_C_B1 = train_op_C_B1 = opt.minimize(C_loss_B1) self._train_op_C_B2 = train_op_C_B2 = opt.minimize(C_loss_B2) self._assign_ht_op = assign_ht_op = ht.assign(ht_assign) self._assign_r_op = assign_r_op = self._r.assign(r_assign) self._assign_m_op = assign_m_op = self._Mh.assign(m_assign) # Saver self._saver = tf.train.Saver()
def _build_encoder(self): """Builds coattention encoder.""" # most used variables params = self._params batch_size = params.batch_size hidden_size = params.hidden_size min_timesteps = params.q_timesteps max_timesteps = params.c_timesteps with tf.variable_scope('embedding') as vs, tf.device( self._next_device()): # fixed embedding embedding = tf.get_variable( 'embedding', [self._vsize, params.emb_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4), trainable=False) # embed c_inputs and q_inputs. fn = lambda x: tf.nn.embedding_lookup(embedding, x) c_vector = tf.map_fn(lambda x: fn(x), self._contexts, dtype=tf.float32) c_embedding = tf.transpose(c_vector, perm=[1, 0, 2]) q_vector = tf.map_fn(lambda x: fn(x), self._questions, dtype=tf.float32) q_embedding = tf.transpose(q_vector, perm=[1, 0, 2]) # shared lstm encoder lstm_enc = tf.nn.rnn_cell.LSTMCell(hidden_size) with tf.variable_scope('c_embedding'), tf.device(self._next_device()): # compute context embedding c, _ = tf.nn.dynamic_rnn(lstm_enc, c_embedding, dtype=tf.float32) # append sentinel fn = lambda x: tf.concat( 0, [x, tf.zeros([1, hidden_size], dtype=tf.float32)]) c_encoding = tf.map_fn(lambda x: fn(x), c, dtype=tf.float32) with tf.variable_scope('q_embedding'), tf.device(self._next_device()): # compute question embedding q, _ = tf.nn.dynamic_rnn(lstm_enc, q_embedding, dtype=tf.float32) # append sentinel fn = lambda x: tf.concat( 0, [x, tf.zeros([1, hidden_size], dtype=tf.float32)]) q_encoding = tf.map_fn(lambda x: fn(x), q, dtype=tf.float32) # allow variation between c_embedding and q_embedding q_encoding = tf.tanh( batch_linear(q_encoding, min_timesteps + 1, True)) q_variation = tf.transpose(q_encoding, perm=[0, 2, 1]) with tf.variable_scope('coattention'), tf.device(self._next_device()): # compute affinity matrix, (batch_size, context+1, question+1) L = tf.batch_matmul(c_encoding, q_variation) # shape = (batch_size, question+1, context+1) L_t = tf.transpose(L, perm=[0, 2, 1]) # normalize with respect to question a_q = tf.map_fn(lambda x: tf.nn.softmax(x), L_t, dtype=tf.float32) # normalize with respect to context a_c = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32) # summaries with respect to question, (batch_size, question+1, hidden_size) c_q = tf.batch_matmul(a_q, c_encoding) c_q_emb = tf.concat( 1, [q_variation, tf.transpose(c_q, perm=[0, 2, 1])]) # summaries of previous attention with respect to context c_d = tf.batch_matmul(c_q_emb, a_c, adj_y=True) # final coattention context, (batch_size, context+1, 3*hidden_size) co_att = tf.concat( 2, [c_encoding, tf.transpose(c_d, perm=[0, 2, 1])]) with tf.variable_scope('encoder'), tf.device(self._next_device()): # LSTM for coattention encoding cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size) cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size) # compute coattention encoding u, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, co_att, sequence_length=tf.to_int64([max_timesteps] * batch_size), dtype=tf.float32) self._u = tf.concat(2, u)
def test_BatchMatMul(self): t = tf.batch_matmul(*self.random((2, 4, 3, 4), (2, 4, 3, 5)), adj_x=True) self.check(t)
def create(model, config): dim_v, dim_i, dim_d, dim_t, dim_m, dim_b, dim_n, dim_c = config.getint( 'vocabsize'), config.getint('wvecsize'), config.getint( 'depth'), config.getint('steps'), config.getint( 'memory'), config.getint('batch'), config.getint( 'deepness'), config.getint('classes') lrate_ms, dstep_ms, drate_ms, optim_ms = config.getfloat( 'mslrate'), config.getint('msdstep'), config.getfloat( 'msdrate'), getattr(tf.train, config.get('msoptim')) lrate_ce, dstep_ce, drate_ce, optim_ce = config.getfloat( 'celrate'), config.getint('cedstep'), config.getfloat( 'cedrate'), getattr(tf.train, config.get('ceoptim')) with tf.name_scope('embedding'): model['We'] = tf.Variable(tf.truncated_normal([dim_v, dim_i], stddev=1.0 / dim_i), name='We') model['Be'] = tf.Variable(tf.truncated_normal([1, dim_i], stddev=1.0 / dim_i), name='Be') with tf.name_scope('plstm'): with tf.name_scope('input'): for ii in xrange(dim_t): model['pxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='pxi_%i' % ii) model['px_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['pxi_%i' % ii]), model['Be'], name='px_%i' % ii) with tf.name_scope('label'): for ii in xrange(dim_t): model['pyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='pyi_%i' % ii) model['py_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['pyi_%i' % ii]), model['Be'], name='py_%i' % ii) for i in xrange(dim_d): with tf.name_scope('input_%i' % i): for ii in xrange(dim_t): model['px_%i_%i' % (i, ii)] = model['px_%i' % ii] if i == 0 else model['ph_%i_%i' % (i - 1, ii)] with tf.name_scope('inputgate_%i' % i): model['pWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pWi_%i' % i) model['pBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBi_%i' % i) for ii in xrange(dim_t): model['pi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['px_%i_%i' % (i, ii)], model['pWi_%i' % i]), model['pBi_%i' % i]), name='pi_%i_%i' % (i, ii)) with tf.name_scope('forgetgate_%i' % i): model['pWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pWf_%i' % i) model['pBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBf_%i' % i) for ii in xrange(dim_t): model['pf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['px_%i_%i' % (i, ii)], model['pWf_%i' % i]), model['pBf_%i' % i]), name='pf_%i_%i' % (i, ii)) with tf.name_scope('outputgate_%i' % i): model['pWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pWo_%i' % i) model['pBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBo_%i' % i) for ii in xrange(dim_t): model['po_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['px_%i_%i' % (i, ii)], model['pWo_%i' % i]), model['pBo_%i' % i]), name='po_%i_%i' % (i, ii)) with tf.name_scope('cellstate_%i' % i): model['pWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pWc_' + str(i)) model['pBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBc_' + str(i)) for ii in xrange(dim_t): model['pcc_%i_%i' % (i, ii)] = tf.Variable( tf.truncated_normal([dim_b, dim_i], stddev=1.0 / dim_i), name='pcc_%i_%i' % (i, ii)) if ii == 0 else model[ 'pc_%i_%i' % (i, ii - 1)] # consider starting with all zeros model['pc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['pcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['pf_%i_%i' % (i, ii)], model['pcc_%i_%i' % (i, ii)]), tf.mul( model['pi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['px_%i_%i' % (i, ii)], model['pWc_%i' % i]), model['pBc_%i' % i])))), name='pc_%i_%i' % (i, ii)) with tf.name_scope('hidden_%i' % i): model['pWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='pWz_%i' % i) model['pBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='pBz_%i' % i) for ii in xrange(dim_t): model['pz_%i_%i' % (i, ii)] = tf.add( tf.matmul(model['pc_%i_%i' % (i, ii)], model['pWz_%i' % i]), model['pBz_%i' % i], name='pz_%i_%i' % (i, ii)) with tf.name_scope('output_%i' % i): for ii in xrange(dim_t): model['ph_%i_%i' % (i, ii)] = tf.mul( model['po_%i_%i' % (i, ii)], tf.nn.tanh(model['pz_%i_%i' % (i, ii)]), name='ph_%i_%i' % (i, ii)) with tf.name_scope('output'): for ii in xrange(dim_t): model['ph_%i' % ii] = model['ph_%i_%i' % (dim_d - 1, ii)] with tf.name_scope('meansquared'): for ii in xrange(dim_t): model['pms_%i' % ii] = tf.select(tf.equal(model['pxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['py_%i' % ii], model['ph_%i' % ii])), [1]), name='pms_%i' % ii) model['pms'] = tf.reduce_sum(tf.add_n( [model['pms_%i' % ii] for ii in xrange(dim_t)]), name='pms') model['spms'] = tf.scalar_summary(model['pms'].name, model['pms']) with tf.name_scope('memory'): for i in xrange(dim_d): model['hmi_%i' % i] = tf.reshape([ model['pc_%i_%i' % (i, ii)] for ii in xrange(dim_t - dim_m, dim_t) ], [dim_t, dim_b, dim_i], name='hmi_%i' % i) model['hm_%i' % i] = tf.transpose(model['hmi_%i' % i], [1, 0, 2], name='hm_%i' % i) with tf.name_scope('hlstm'): with tf.name_scope('input'): for ii in xrange(dim_t): model['hxi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='hxi_%i' % ii) model['hx_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['hxi_%i' % ii]), model['Be'], name='hx_%i' % ii) with tf.name_scope('label'): for ii in xrange(dim_t): model['hyi_%i' % ii] = tf.placeholder(tf.int32, [dim_b], name='hyi_%i' % ii) model['hy_%i' % ii] = tf.add(tf.nn.embedding_lookup( model['We'], model['hyi_%i' % ii]), model['Be'], name='hy_%i' % ii) for i in xrange(dim_d): with tf.name_scope('input_%i' % i): for ii in xrange(dim_t): model['hx_%i_%i' % (i, ii)] = model['hx_%i' % ii] if i == 0 else model['hh_%i_%i' % (i - 1, ii)] with tf.name_scope('inputgate_%i' % i): model['hWi_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWi_%i' % i) model['hBi_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBi_%i' % i) for ii in xrange(dim_t): model['hi_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hx_%i_%i' % (i, ii)], model['hWi_%i' % i]), model['hBi_%i' % i]), name='hi_%i_%i' % (i, ii)) with tf.name_scope('forgetgate_%i' % i): model['hWf_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWf_%i' % i) model['hBf_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBf_%i' % i) for ii in xrange(dim_t): model['hf_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hx_%i_%i' % (i, ii)], model['hWf_%i' % i]), model['hBf_%i' % i]), name='hf_%i_%i' % (i, ii)) with tf.name_scope('outputgate_%i' % i): model['hWo_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWo_%i' % i) model['hBo_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBo_%i' % i) for ii in xrange(dim_t): model['ho_%i_%i' % (i, ii)] = tf.nn.sigmoid( tf.add( tf.matmul(model['hx_%i_%i' % (i, ii)], model['hWo_%i' % i]), model['hBo_%i' % i]), name='ho_%i_%i' % (i, ii)) with tf.name_scope('cellstate_%i' % i): model['hWc_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWc_' + str(i)) model['hBc_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBc_' + str(i)) for ii in xrange(dim_t): model['hcc_%i_%i' % (i, ii)] = model[ 'pc_%i_%i' % (i, dim_t - 1)] if ii == 0 else model['hc_%i_%i' % (i, ii - 1)] model['hc_%i_%i' % (i, ii)] = tf.select( tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), model['hcc_%i_%i' % (i, ii)], tf.add( tf.mul(model['hf_%i_%i' % (i, ii)], model['hcc_%i_%i' % (i, ii)]), tf.mul( model['hi_%i_%i' % (i, ii)], tf.nn.tanh( tf.add( tf.matmul(model['hx_%i_%i' % (i, ii)], model['hWc_%i' % i]), model['hBc_%i' % i])))), name='hc_%i_%i' % (i, ii)) with tf.name_scope('attention_%i' % i): model['hWa_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWa_%i' % i) model['hBa_%i' % i] = tf.Variable(tf.truncated_normal( [dim_t, 1], stddev=1.0 / dim_t), name='hBa_%i' % i) for ii in xrange(dim_t): model['hat_%i_%i' % (i, ii)] = tf.nn.softmax( tf.add( tf.reshape( tf.transpose( tf.batch_matmul( model['hm_%i' % i], tf.reshape( tf.transpose( tf.matmul( model['hWa_%i' % i], tf.transpose( model['hc_%i_%i' % (i, ii)]))), [dim_b, dim_i, 1]))), [dim_t, dim_b]), model['hBa_%i' % i]), name='hat_%i_%i' % (i, ii)) model['hcx_%i_%i' % (i, ii)] = tf.reshape(tf.batch_matmul( tf.reshape( tf.transpose(model['hat_%i_%i' % (i, ii)]), [dim_b, 1, dim_t]), model['hm_%i' % i]), [dim_b, dim_i], name='hcx_%i_%i' % (i, ii)) with tf.name_scope('hidden_%i' % i): model['hWx_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWx_%i' % i) model['hWz_%i' % i] = tf.Variable(tf.truncated_normal( [dim_i, dim_i], stddev=1.0 / dim_i), name='hWz_%i' % i) model['hBz_%i' % i] = tf.Variable(tf.truncated_normal( [1, dim_i], stddev=1.0 / dim_i), name='hBz_%i' % i) for ii in xrange(dim_t): model['hz_%i_%i' % (i, ii)] = tf.add(tf.add( tf.matmul(model['hcx_%i_%i' % (i, ii)], model['hWx_%i' % i]), tf.matmul(model['hc_%i_%i' % (i, ii)], model['hWz_%i' % i])), model['hBz_%i' % i], name='hz_%i_%i' % (i, ii)) with tf.name_scope('output_%i' % i): for ii in xrange(dim_t): model['hh_%i_%i' % (i, ii)] = tf.mul( model['ho_%i_%i' % (i, ii)], tf.nn.tanh(model['hz_%i_%i' % (i, ii)]), name='hh_%i_%i' % (i, ii)) with tf.name_scope('output'): for ii in xrange(dim_t): model['hh_%i' % ii] = model['hh_%i_%i' % (dim_d - 1, ii)] with tf.name_scope('meansquared'): for ii in xrange(dim_t): model['hms_%i' % ii] = tf.select(tf.equal(model['hxi_%i' % ii], tf.zeros([dim_b], tf.int32)), tf.zeros([dim_b], tf.float32), tf.reduce_sum( tf.square( tf.sub(model['hy_%i' % ii], model['hh_%i' % ii])), [1]), name='hms_%i' % ii) model['hms'] = tf.reduce_sum(tf.add_n( [model['hms_%i' % ii] for ii in xrange(dim_t)]), name='hms') model['shms'] = tf.scalar_summary(model['hms'].name, model['hms']) with tf.name_scope('classification'): with tf.name_scope('label'): model['clabel'] = tf.placeholder(tf.float32, [dim_b, dim_c], name='clabel') for i in xrange(dim_n): with tf.name_scope('layer_%i' % i): model['cW_%i' % i] = tf.Variable( tf.truncated_normal([2 * dim_i, 2 * dim_i], stddev=0.5 / dim_i), name='cW_%i' % i) if i != dim_n - 1 else tf.Variable(tf.truncated_normal( [2 * dim_i, dim_c], stddev=1.0 / dim_c), name='cW_%i' % i) model['cB_%i' % i] = tf.Variable( tf.truncated_normal([1, 2 * dim_i], stddev=0.5 / dim_i), name='cB_%i' % i) if i != dim_n - 1 else tf.Variable( tf.truncated_normal([1, dim_c], stddev=1.0 / dim_c), name='cB_%i' % i) model['cx_%i' % i] = tf.concat(1, [ model['ph_%i' % (dim_t - 1)], model['hh_%i' % (dim_t - 1)] ], name='cx_%i' % i) if i == 0 else model['cy_%i' % (i - 1)] model['cy_%i' % i] = tf.add(tf.matmul(model['cx_%i' % i], model['cW_%i' % i]), model['cB_%i' % i], name='cy_%i' % i) with tf.name_scope('output'): model['output'] = tf.nn.softmax(model['cy_%i' % (dim_n - 1)], name='output') with tf.name_scope('crossentropy'): model['cce'] = tf.reduce_sum( -tf.mul(model['clabel'], tf.log(model['output'])), name='cce') model['scce'] = tf.scalar_summary(model['cce'].name, model['cce']) model['gsms'] = tf.Variable(0, trainable=False, name='gsms') model['lrms'] = tf.train.exponential_decay(lrate_ms, model['gsms'], dstep_ms, drate_ms, staircase=False, name='lrms') model['tms'] = optim_ms(model['lrms']).minimize(model['pms'] + model['hms'], global_step=model['gsms'], name='tms') model['gsce'] = tf.Variable(0, trainable=False, name='gsce') model['lrce'] = tf.train.exponential_decay(lrate_ce, model['gsce'], dstep_ce, drate_ce, staircase=False, name='lrce') model['tce'] = optim_ce(model['lrce']).minimize(model['cce'], global_step=model['gsce'], name='tce') return model
''' scope = 'encode_x' x_hat_encode = make_conv_net(x_hat, scope) #x_hat_inv_mag = tf.rsqrt(tf.clip_by_value(tf.reduce_sum(tf.square(x_hat_encode),1,keep_dims=True),eps,float("inf"))) cos_sim_list = [] if not tie: scope = 'encode_x_i' for i in range(n_samples): x_i_encode = make_conv_net(x_i[:, i, :, :, :], scope, tie or i > 0, not x_i_learn) x_i_inv_mag = tf.rsqrt( tf.clip_by_value( tf.reduce_sum(tf.square(x_i_encode), 1, keep_dims=True), eps, float("inf"))) dotted = tf.squeeze( tf.batch_matmul(tf.expand_dims(x_hat_encode, 1), tf.expand_dims(x_i_encode, 2)), [ 1, ]) cos_sim_list.append(dotted * x_i_inv_mag) #*x_hat_inv_mag cos_sim = tf.concat(1, cos_sim_list) tf.histogram_summary('cos sim', cos_sim) weighting = tf.nn.softmax(cos_sim) label_prob = tf.squeeze(tf.batch_matmul(tf.expand_dims(weighting, 1), y_i)) tf.histogram_summary('label prob', label_prob) top_k = tf.nn.in_top_k(label_prob, y_hat_ind, 1) acc = tf.reduce_mean(tf.to_float(top_k)) tf.scalar_summary('train avg accuracy', acc) correct_prob = tf.reduce_sum( tf.log(tf.clip_by_value(label_prob, eps, 1.0)) * y_hat, 1)
def __call__(self, inputs, state, scope=None): scope = scope or type(self).__name__ # It's always a good idea to scope variables in functions lest they # be defined elsewhere! input_size = inputs.get_shape()[1] #print('Input size: ' , input_size) with tf.variable_scope(scope): ### YOUR CODE HERE (~20-30 lines) W_c = tf.get_variable( "W_c", (input_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) U_c = tf.get_variable( "U_c", (self.state_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) b_c = tf.get_variable("b_c", (self.state_size), initializer=tf.constant_initializer(0)) W_o = tf.get_variable( "W_o", (input_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) U_o = tf.get_variable( "U_o", (self.state_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) b_o = tf.get_variable("b_o", (self.state_size), initializer=tf.constant_initializer(0)) W_i = tf.get_variable( "W_i", (input_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) U_i = tf.get_variable( "U_i", (self.state_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) b_i = tf.get_variable("b_i", (self.state_size), initializer=tf.constant_initializer(0)) W_f = tf.get_variable( "W_f", (input_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) U_f = tf.get_variable( "U_f", (self.state_size, self.state_size), initializer=tf.contrib.layers.xavier_initializer()) b_f = tf.get_variable("b_f", (self.state_size), initializer=tf.constant_initializer(0)) o_t = tf.sigmoid( tf.batch_matmul(inputs, W_o) + tf.batch_matmul(state[0], U_o) + b_c) f_t = tf.sigmoid( tf.batch_matmul(inputs, W_f) + tf.batch_matmul(state[0], U_f) + b_f) i_t = tf.sigmoid( tf.batch_matmul(inputs, W_i) + tf.batch_matmul(state[0], U_i) + b_i) c_t_tilde = tf.tanh( tf.batch_matmul(inputs, W_c) + tf.batch_matmul(state[0], U_c) + b_c) c_t = state[1] * f_t + i_t * c_t_tilde h_t = o_t * tf.tanh(c_t) #o_t = tf.tanh(tf.matmul(inputs,U_o)+ r_t*tf.matmul(state,W_o) + b_o) new_state = [h_t, c_t] ### END YOUR CODE ### # For a GRU, the output and state are the same (N.B. this isn't true # for an LSTM, though we aren't using one of those in our # assignment) output = new_state return h_t, new_state
def __init__(self, hidden_num, inputs, seq_len=None, cell=None, optimizer=None, reverse=True, decode_without_input=True): """ Args: hidden_num : number of hidden elements of each LSTM unit. inputs : a list of input tensors with size (batch_num x elem_num) cell : an rnn cell object (the default option is `tf.python.ops.rnn_cell.LSTMCell`) optimizer : optimizer for rnn (the default option is `tf.train.AdamOptimizer`) reverse : Option to decode in reverse order. decode_without_input : Option to decode without input. """ self.batch_num = inputs[0].get_shape().as_list()[0] self.elem_num = inputs[0].get_shape().as_list()[1] if cell is None: self._enc_cell = LSTMCell(hidden_num) self._dec_cell = LSTMCell(hidden_num) else: self._enc_cell = cell self._dec_cell = cell with tf.variable_scope('encoder'): self.z_codes, self.enc_state = tf.nn.dynamic_rnn( self._enc_cell, inputs, sequence_length=seq_len, dtype=tf.float32) self.enc_state = tf.identity(self.enc_state, name='enc_state') with tf.variable_scope('decoder') as vs: dec_weight_ = tf.Variable(tf.truncated_normal( [hidden_num, self.elem_num], dtype=tf.float32), name="dec_weight") dec_bias_ = tf.Variable(tf.constant(0.1, shape=[self.elem_num], dtype=tf.float32), name="dec_bias") if decode_without_input: dec_inputs = [ tf.zeros(tf.shape(inputs[0]), dtype=tf.float32) for _ in range(len(inputs)) ] dec_outputs, dec_state = tf.nn.rnn( self._dec_cell, dec_inputs, initial_state=self.enc_state, sequence_length=seq_len, dtype=tf.float32) """the shape of each tensor dec_output_ : (step_num x hidden_num) dec_weight_ : (hidden_num x elem_num) dec_bias_ : (elem_num) output_ : (step_num x elem_num) input_ : (step_num x elem_num) """ if reverse: dec_outputs = dec_outputs[::-1] dec_output_ = tf.transpose(tf.pack(dec_outputs), [1, 0, 2]) dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0), [self.batch_num, 1, 1]) self.output_ = tf.batch_matmul(dec_output_, dec_weight_) + dec_bias_ else: dec_state = self.enc_state dec_input_ = tf.zeros(tf.shape(inputs[0]), dtype=tf.float32) dec_outputs = [] for step in range(len(inputs)): if step > 0: vs.reuse_variables() dec_input_, dec_state = self._dec_cell( dec_input_, dec_state) dec_input_ = tf.matmul(dec_input_, dec_weight_) + dec_bias_ dec_outputs.append(dec_input_) if reverse: dec_outputs = dec_outputs[::-1] self.output_ = tf.transpose(tf.pack(dec_outputs), [1, 0, 2]) self.input_ = tf.transpose(tf.pack(inputs), [1, 0, 2]) self.loss = tf.reduce_mean(tf.square(self.input_ - self.output_)) if optimizer is None: self.train = tf.train.AdamOptimizer().minimize(self.loss) else: self.train = optimizer.minimize(self.loss)
biases = { 'out': tf.Variable(tf.random_normal([n_classes],dtype=tf.float32)) } # need to get a prediction for each sentence # get the vector representation of each word #pred1 = np.mean(x1, axis=1)#conv_net(x1, weights, biases, keep_prob) #pred2 = np.mean(x2, axis=1)#conv_net(x2, weights, biases, keep_prob) # concatenate both representations out = tf.concat(1, [x1, x2])#[pred1, pred2]) # predict the relation class pred = tf.add(tf.batch_matmul(out, weights['out']), biases['out']) print(tf.shape(pred)) # define loss and optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # initializing all variables init = tf.global_variables_initializer() # launch the graph saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
def batch_dot(x, y, axes=None): """Batchwise dot product. `batch_dot` is used to compute dot product of `x` and `y` when `x` and `y` are data in batch, i.e. in a shape of `(batch_size, :)`. `batch_dot` results in a tensor or variable with less dimensions than the input. If the number of dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is at least 2. # Arguments x, y: Keras tensors or variables with `ndim >= 2` axes: list of (or single) int with target dimensions. The lengths of `axes[0]` and `axes[1]` should be the same. # Returns A tensor with shape equal to the concatenation of `x`'s shape (less the dimension that was summed over) and `y`'s shape (less the batch dimension and the dimension that was summed over). If the final rank is 1, we reshape it to `(batch_size, 1)`. # Examples Assume `x = [[1, 2], [3, 4]]` and `y = [[5, 6], [7, 8]]` `batch_dot(x, y, axes=1) = [[17, 53]]` which is the main diagonal of `x.dot(y.T)`, although we never have to calculate the off-diagonal elements. Shape inference: Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`. If `axes` is (1, 2), to find the output shape of resultant tensor, loop through each dimension in `x`'s shape and `y`'s shape: * `x.shape[0]` : 100 : append to output shape * `x.shape[1]` : 20 : do not append to output shape, dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1) * `y.shape[0]` : 100 : do not append to output shape, always ignore first dimension of `y` * `y.shape[1]` : 30 : append to output shape * `y.shape[2]` : 20 : do not append to output shape, dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2) `output_shape` = `(100, 30)` ```python >>> x_batch = K.ones(shape=(32, 20, 1)) >>> y_batch = K.ones(shape=(32, 30, 20)) >>> xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=[1, 2]) >>> K.int_shape(xy_batch_dot) (32, 1, 30) ``` """ if isinstance(axes, int): axes = (axes, axes) #print('1') if ndim(x) == 2 and ndim(y) == 2: if tf_major_version >= 1: if axes[0] == axes[1]: out = tf.reduce_sum(tf.multiply(x, y), axes[0]) else: out = tf.reduce_sum(tf.multiply(tf.transpose(x, [1, 0]), y), axes[1]) else: if axes[0] == axes[1]: out = tf.reduce_sum(tf.mul(x, y), axes[0]) else: out = tf.reduce_sum(tf.mul(tf.transpose(x, [1, 0]), y), axes[1]) else: if axes is not None: #print('2') adj_x = None if axes[0] == ndim(x) - 1 else True adj_y = True if axes[1] == ndim(y) - 1 else None else: #print('3') adj_x = None adj_y = None # TODO: remove later. if hasattr(tf, 'batch_matmul'): try: out = tf.batch_matmul(x, y, adj_a=adj_x, adj_b=adj_y) #print('4') except TypeError: out = tf.batch_matmul(x, y, adj_x=adj_x, adj_y=adj_y) else: out = tf.matmul(x, y, adjoint_a=adj_x, adjoint_b=adj_y) if ndim(out) == 1: out = expand_dims(out, 1) return out
def __init__(self, is_training, word_embeddings, settings): self.num_steps = num_steps = settings.num_steps self.vocab_size = vocab_size = settings.vocab_size self.num_classes = num_classes = settings.num_classes self.gru_size = gru_size = settings.gru_size self.big_num = big_num = settings.big_num self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_word') self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos1') self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='input_pos2') self.absolute_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='absolute_pos1') self.absolute_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, num_steps], name='absolute_pos2') self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='input_y') self.total_shape = tf.placeholder(dtype=tf.int32, shape=[big_num + 1], name='total_shape') total_num = self.total_shape[-1] word_embedding = tf.get_variable(initializer=word_embeddings, name='word_embedding') pos1_embedding = tf.get_variable('pos1_embedding', [settings.pos_num, settings.pos_size]) pos2_embedding = tf.get_variable('pos2_embedding', [settings.pos_num, settings.pos_size]) attention_w = tf.get_variable('attention_omega', [gru_size, 1]) sen_a = tf.get_variable('attention_A', [gru_size]) sen_r = tf.get_variable('query_r', [gru_size, 1]) relation_embedding = tf.get_variable('relation_embedding', [self.num_classes, gru_size]) sen_d = tf.get_variable('bias_d', [self.num_classes]) gru_cell_forward = tf.nn.rnn_cell.GRUCell(gru_size) gru_cell_backward = tf.nn.rnn_cell.GRUCell(gru_size) if is_training and settings.keep_prob < 1: gru_cell_forward = tf.nn.rnn_cell.DropoutWrapper( gru_cell_forward, output_keep_prob=settings.keep_prob) gru_cell_backward = tf.nn.rnn_cell.DropoutWrapper( gru_cell_backward, output_keep_prob=settings.keep_prob) cell_forward = tf.nn.rnn_cell.MultiRNNCell([gru_cell_forward] * settings.num_layers) cell_backward = tf.nn.rnn_cell.MultiRNNCell([gru_cell_backward] * settings.num_layers) sen_repre = [] sen_alpha = [] sen_s = [] sen_out = [] self.prob = [] self.predictions = [] self.loss = [] self.accuracy = [] self.total_loss = 0.0 self._initial_state_forward = cell_forward.zero_state( total_num, tf.float32) self._initial_state_backward = cell_backward.zero_state( total_num, tf.float32) # embedding layer inputs_forward = tf.concat(2, [ tf.nn.embedding_lookup(word_embedding, self.input_word), tf.nn.embedding_lookup(pos1_embedding, self.input_pos1), tf.nn.embedding_lookup(pos2_embedding, self.input_pos2) ]) inputs_backward = tf.concat(2, [ tf.nn.embedding_lookup(word_embedding, tf.reverse(self.input_word, [False, True])), tf.nn.embedding_lookup(pos1_embedding, tf.reverse(self.input_pos1, [False, True])), tf.nn.embedding_lookup(pos1_embedding, tf.reverse(self.input_pos2, [False, True])) ]) outputs_forward = [] state_forward = self._initial_state_forward # Bi-GRU layer with tf.variable_scope('GRU_FORWARD'): for step in range(num_steps): if step > 0: tf.get_variable_scope().reuse_variables() (cell_output_forward, state_forward) = cell_forward(inputs_forward[:, step, :], state_forward) outputs_forward.append(cell_output_forward) outputs_backward = [] state_backward = self._initial_state_backward with tf.variable_scope('GRU_BACKWARD'): for step in range(num_steps): if step > 0: tf.get_variable_scope().reuse_variables() (cell_output_backward, state_backward) = cell_backward(inputs_backward[:, step, :], state_backward) outputs_backward.append(cell_output_backward) output_forward = tf.reshape(tf.concat(1, outputs_forward), [total_num, num_steps, gru_size]) output_backward = tf.reverse( tf.reshape(tf.concat(1, outputs_backward), [total_num, num_steps, gru_size]), [False, True, False]) # word-level attention layer output_h = tf.add(output_forward, output_backward) #attention_r = tf.reshape(tf.batch_matmul(tf.reshape(tf.nn.softmax(tf.reshape(tf.matmul(tf.reshape(tf.tanh(output_h),[total_num*num_steps,gru_size]),attention_w),[total_num,num_steps])),[total_num,1,num_steps]),output_h),[total_num,gru_size]) attention_r = tf.reshape( tf.batch_matmul( tf.reshape(tf.cast(self.absolute_pos1, tf.float32), [total_num, 1, num_steps]), output_h), [total_num, gru_size]) # sentence-level attention layer for i in range(big_num): sen_repre.append( tf.tanh(attention_r[self.total_shape[i]:self.total_shape[i + 1]])) batch_size = self.total_shape[i + 1] - self.total_shape[i] sen_alpha.append( tf.reshape( tf.nn.softmax( tf.reshape( tf.matmul(tf.mul(sen_repre[i], sen_a), sen_r), [batch_size])), [1, batch_size])) sen_s.append( tf.reshape(tf.matmul(sen_alpha[i], sen_repre[i]), [gru_size, 1])) sen_out.append( tf.add( tf.reshape(tf.matmul(relation_embedding, sen_s[i]), [self.num_classes]), sen_d)) self.prob.append(tf.nn.softmax(sen_out[i])) with tf.name_scope("output"): self.predictions.append( tf.argmax(self.prob[i], 0, name="predictions")) with tf.name_scope("loss"): self.loss.append( tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( sen_out[i], self.input_y[i]))) if i == 0: self.total_loss = self.loss[i] else: self.total_loss += self.loss[i] #tf.summary.scalar('loss',self.total_loss) #tf.scalar_summary(['loss'],[self.total_loss]) with tf.name_scope("accuracy"): self.accuracy.append( tf.reduce_mean(tf.cast( tf.equal(self.predictions[i], tf.argmax(self.input_y[i], 0)), "float"), name="accuracy")) #tf.summary.scalar('loss',self.total_loss) tf.scalar_summary('loss', self.total_loss) #regularization self.l2_loss = tf.contrib.layers.apply_regularization( regularizer=tf.contrib.layers.l2_regularizer(0.0001), weights_list=tf.trainable_variables()) self.final_loss = self.total_loss + self.l2_loss tf.scalar_summary('l2_loss', self.l2_loss) tf.scalar_summary('final_loss', self.final_loss)
def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False): """ Given F, representing the GP at the points X, produce the mean and (co-)variance of the GP at the points Xnew. Additionally, there my be Gaussian uncertainty about F as represented by q_sqrt. In this case `f` represents the mean of the distribution and q_sqrt the square-root of the covariance. Additionally, the GP may have been centered (whitened) so that p(v) = N( 0, I) f = L v thus p(f) = N(0, LL^T) = N(0, K). In this case 'f' represents the values taken by v. The method can either return the diagonals of the covariance matrix for each output of the full covariance matrix (full_cov). We assume K independent GPs, represented by the columns of f (and the last dimension of q_sqrt). - Xnew is a data matrix, size N x D - X are data points, size M x D - kern is a GPflow kernel - f is a data matrix, M x K, representing the function values at X, for K functions. - q_sqrt (optional) is a matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K - whiten (optional) is a boolean: whether to whiten the representation as described above. These functions are now considered deprecated, subsumed into this one: gp_predict gaussian_gp_predict gp_predict_whitened gaussian_gp_predict_whitened """ # compute kernel stuff num_data = tf.shape(X)[0] Kmn = kern.K(X, Xnew) Kmm = kern.K(X) + eye(num_data) * settings.numerics.jitter_level Lm = tf.cholesky(Kmm) # Compute the projection matrix A A = tf.matrix_triangular_solve(Lm, Kmn, lower=True) # compute the covariance due to the conditioning if full_cov: fvar = kern.K(Xnew) - tf.matmul(A, A, transpose_a=True) shape = tf.pack([tf.shape(f)[1], 1, 1]) else: fvar = kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) shape = tf.pack([tf.shape(f)[1], 1]) fvar = tf.tile(tf.expand_dims(fvar, 0), shape) # D x N x N or D x N # another backsubstitution in the unwhitened case if not whiten: A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False) # construct the conditional mean fmean = tf.matmul(tf.transpose(A), f) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # D x M x N elif q_sqrt.get_shape().ndims == 3: L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # D x M x M A_tiled = tf.tile(tf.expand_dims(A, 0), tf.pack([tf.shape(f)[1], 1, 1])) LTA = tf.batch_matmul(L, A_tiled, adj_x=True) # D x M x N else: # pragma: no cover raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: fvar = fvar + tf.batch_matmul(LTA, LTA, adj_x=True) # D x N x N else: fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # D x N fvar = tf.transpose(fvar) # N x D or N x N x D return fmean, fvar
q_out = tf.pack(q_out) q_out = tf.reduce_sum( tf.mul(q_out, tf.to_float(tf.expand_dims(Pl['question_mask'], -1))), 0) Vatt = compute_attention(V, q_out) x = merge_modalities(Vatt, q_out) mc_mask = tf.to_float(tf.not_equal(Pl['mc'], a_w2i['</s>'])) norm_mask = tf.expand_dims(tf.reduce_sum(mc_mask, reduction_indices=2), -1) with tf.variable_scope('multiple_choice'): W = tf.get_variable('W') mc_emb = tf.nn.embedding_lookup(W, Pl['mc']) masked_mc_out = tf.mul(tf.expand_dims(mc_mask, -1), mc_emb) mc_out = tf.reduce_sum(masked_mc_out, reduction_indices=2) / norm_mask out_scores = tf.batch_matmul(mc_out, tf.expand_dims(x, 1), adj_y=True)[:, :, 0] out_probas = tf.nn.softmax(out_scores) normalized_ans = Pl['answers'] / tf.expand_dims( tf.reduce_sum(Pl['answers'], reduction_indices=1), -1) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( out_scores, normalized_ans) cost = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer() #optimizer = tf.train.GradientDescentOptimizer(0.01) gvs = optimizer.compute_gradients(cost) # with tf.device('/cpu:0'): cost_s = tf.scalar_summary('train loss', cost, name='train_loss') capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
def _random_pd_matrix(self, shape): # With probability 1 this is positive definite. sqrt = self._rng.randn(*shape) mat = tf.batch_matmul(sqrt, sqrt, adj_y=True) return mat.eval()
def __init__(self, config): entity_total = config.entity relation_total = config.relation batch_size = config.batch_size sizeE = config.hidden_sizeE sizeR = config.hidden_sizeR margin = config.margin with tf.name_scope("read_inputs"): self.pos_h = tf.placeholder(tf.int32, [batch_size]) self.pos_t = tf.placeholder(tf.int32, [batch_size]) self.pos_r = tf.placeholder(tf.int32, [batch_size]) self.neg_h = tf.placeholder(tf.int32, [batch_size]) self.neg_t = tf.placeholder(tf.int32, [batch_size]) self.neg_r = tf.placeholder(tf.int32, [batch_size]) with tf.name_scope("embedding"): self.ent_embeddings = tf.get_variable( name="ent_embedding", shape=[entity_total, sizeE], initializer=tf.contrib.layers.xavier_initializer( uniform=False)) self.rel_embeddings = tf.get_variable( name="rel_embedding", shape=[relation_total, sizeR], initializer=tf.contrib.layers.xavier_initializer( uniform=False)) self.rel_matrix = tf.get_variable( name="rel_matrix", shape=[relation_total, sizeE * sizeR], initializer=tf.contrib.layers.xavier_initializer( uniform=False)) with tf.name_scope('lookup_embeddings'): pos_h_e = tf.reshape( tf.nn.embedding_lookup(self.ent_embeddings, self.pos_h), [-1, sizeE, 1]) pos_t_e = tf.reshape( tf.nn.embedding_lookup(self.ent_embeddings, self.pos_t), [-1, sizeE, 1]) pos_r_e = tf.reshape( tf.nn.embedding_lookup(self.rel_embeddings, self.pos_r), [-1, sizeR]) neg_h_e = tf.reshape( tf.nn.embedding_lookup(self.ent_embeddings, self.neg_h), [-1, sizeE, 1]) neg_t_e = tf.reshape( tf.nn.embedding_lookup(self.ent_embeddings, self.neg_t), [-1, sizeE, 1]) neg_r_e = tf.reshape( tf.nn.embedding_lookup(self.rel_embeddings, self.neg_r), [-1, sizeR]) matrix = tf.reshape( tf.nn.embedding_lookup(self.rel_matrix, self.neg_r), [-1, sizeR, sizeE]) pos_h_e = tf.reshape(tf.batch_matmul(matrix, pos_h_e), [-1, sizeR]) pos_t_e = tf.reshape(tf.batch_matmul(matrix, pos_t_e), [-1, sizeR]) neg_h_e = tf.reshape(tf.batch_matmul(matrix, neg_h_e), [-1, sizeR]) neg_t_e = tf.reshape(tf.batch_matmul(matrix, neg_t_e), [-1, sizeR]) if config.L1_flag: pos = tf.reduce_sum(abs(pos_h_e + pos_r_e - pos_t_e), 1, keep_dims=True) neg = tf.reduce_sum(abs(neg_h_e + neg_r_e - neg_t_e), 1, keep_dims=True) else: pos = tf.reduce_sum((pos_h_e + pos_r_e - pos_t_e)**2, 1, keep_dims=True) neg = tf.reduce_sum((neg_h_e + neg_r_e - neg_t_e)**2, 1, keep_dims=True) with tf.name_scope("output"): self.loss = tf.reduce_sum(tf.maximum(pos - neg + margin, 0))
def batch_timesteps_linear( input, output_size, bias, bias_start=0.0, use_l2_loss=False, use_weight_normalization=use_weight_normalization_default, scope=None, tranpose_input=True, timestep=-1): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 3D Tensor [timesteps, batch_size, input_size] output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ # Calculate the total size of arguments on dimension 2. if tranpose_input: input = tf.transpose(input, [1, 0, 2]) shape_list = input.get_shape().as_list() if len(shape_list) != 3: raise ValueError( 'shape must be of size 3, you have inputted shape size of:', len(shape_list)) num_timesteps = shape_list[0] batch_size = shape_list[1] total_arg_size = shape_list[2] if use_l2_loss: l_regularizer = tf.contrib.layers.l2_regularizer(1e-5) else: l_regularizer = None # Now the computation. with tf.variable_scope(scope or "Linear"): matrix = tf.get_variable( "Matrix", [total_arg_size, output_size], initializer=tf.uniform_unit_scaling_initializer(), regularizer=l_regularizer) if use_weight_normalization: matrix = weight_normalization(matrix) matrix = tf.tile(tf.expand_dims(matrix, 0), [num_timesteps, 1, 1]) res = tf.batch_matmul(input, matrix) if bias: bias_term = tf.get_variable( "Bias", [output_size], initializer=tf.constant_initializer(bias_start)) res = res + bias_term if tranpose_input: res = tf.transpose(res, [1, 0, 2]) return res
def diagonal_bilinear(inputs1, inputs2, output_size, add_bias1=True, add_bias2=True, initializer=None, scope=None, moving_params=None): """""" with tf.variable_scope(scope or 'DiagonalBilinear'): # Reformat the inputs ndims = len(inputs1.get_shape().as_list()) inputs1_shape = tf.shape(inputs1) inputs1_bucket_size = inputs1_shape[ndims-2] inputs1_size = inputs1.get_shape().as_list()[-1] inputs2_shape = tf.shape(inputs2) inputs2_bucket_size = inputs2_shape[ndims-2] inputs2_size = inputs2.get_shape().as_list()[-1] output_shape = [] batch_size = 1 for i in xrange(ndims-2): batch_size *= inputs1_shape[i] output_shape.append(inputs1_shape[i]) output_shape.append(inputs1_bucket_size) output_shape.append(output_size) output_shape.append(inputs2_bucket_size) output_shape = tf.pack(output_shape) inputs1 = tf.reshape(inputs1, tf.pack([batch_size, inputs1_bucket_size, inputs1_size])) inputs2 = tf.reshape(inputs2, tf.pack([batch_size, inputs2_bucket_size, inputs2_size])) # Get the matrix if initializer is None and moving_params is None: initializer = tf.ones_initializer weights = tf.get_variable('Weights', [output_size, inputs1_size], initializer=initializer) if moving_params is not None: weights = moving_params.average(weights) else: tf.add_to_collection('Weights', weights) # Get the bias if add_bias: bias = tf.get_variable('Biases', [output_size], initializer=tf.zeros_initializer) if moving_params is not None: bias = moving_params.average(bias) bias = tf.reshape(bias, [-1,1]) else: bias = 0 # Do the multiplications # (bn x 1 x d) (r x d) -> (bn x r x d) lin = tf.reshape(inputs1, [-1, 1, inputs1_size]) * weights # (b x nr x d) (b x n x d)T -> (b x nr x n) bilin = tf.batch_matmul(tf.reshape(lin, tf.pack([batch_size, inputs1_bucket_size*output_size, inputs2_size])), inputs2, adj_y=True) # (bn x r x n) bilin = tf.reshape(bilin, tf.pack([-1, output_size, inputs2_bucket_size])) + bias # (b x n x r x n) bilin = tf.reshape(bilin, output_shape) if add_bias1: with tf.variable_scope('Input1_Biases'): inputs1.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(inputs1_size)]) bilin += tf.expand_dims(linear(inputs1, output_size, add_bias=False, moving_params=moving_params), 3) if add_bias2: with tf.variable_scope('Input2_Biases'): inputs2.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(inputs2_size)]) bilin += tf.expand_dims(tf.transpose(linear(inputs2, output_size, add_bias=False, moving_params=moving_params), [0, 2, 1]), 1) return bilin
print("train count:\t%d"%(trainSet.shape[0])) print("test count:\t%d"%(testSet.shape[0])) print("="*20) # embedding layer u = tf.placeholder(tf.int32, [None, 1]) v = tf.placeholder(tf.int32, [None, 1]) r = tf.placeholder(tf.float32, [None, 1]) U = tf.Variable(tf.random_uniform([userCount, k], -0.05, 0.05)) V = tf.Variable(tf.random_uniform([itemCount, k], -0.05, 0.05)) uFactor = tf.nn.embedding_lookup(U, u) vFactor = tf.nn.embedding_lookup(V, v) matmul = tf.reshape(tf.batch_matmul(uFactor, vFactor, adj_x=True, adj_y=False), [-1, k*k]) merge = tf.concat(1, [tf.reshape(uFactor, [-1, k]), tf.reshape(vFactor, [-1, k]), matmul]) # fully connection layer import math layer1 = k * k + 2 * k layer2 = k scale1 = math.sqrt(6.0 / (layer1 + layer2)) scale2 = math.sqrt(6.0 / (layer2 + 1)) W1 = tf.Variable(tf.random_uniform([layer1, layer2], -scale1, scale1)) b1 = tf.Variable(tf.random_uniform([layer2], -scale1, scale1)) y1 = tf.sigmoid(tf.matmul(merge, W1) + b1) W2 = tf.Variable(tf.random_uniform([layer2, 1], -scale2, scale2)) b2 = tf.Variable(tf.random_uniform([1], -scale2, scale2))
def add_model(self): with tf.device('/cpu:0'): x = tf.placeholder(tf.int32, [None, self.args.num_features]) y = tf.placeholder(tf.float32, [None]) b = tf.Variable(tf.random_uniform([1], -.1, .1)) embedding_w = tf.concat(0, [ tf.constant([[0.] * 1], dtype=tf.float32), tf.Variable( tf.random_uniform([self.args.max_features, 1], -.1, .1)) ]) embedding_v = tf.concat(0, [ tf.constant([[0.] * self.args.dim], dtype=tf.float32), tf.Variable( tf.random_uniform([self.args.max_features, self.args.dim], -.1, .1)) ]) with tf.device('/gpu:0'): embed_w = tf.nn.embedding_lookup(embedding_w, x) embed_v = tf.nn.embedding_lookup(embedding_v, x) w_x = tf.reduce_sum(embed_w, [1, 2]) # print w_x.get_shape() m = tf.batch_matmul(embed_v, tf.transpose(embed_v, perm=[0, 2, 1])) # mask = np.array([[1 if j > i else 0 for j in range(self.args.num_features) ] for i in range(self.args.num_features)]).astype(bool) m_l = [] for i in range(self.args.num_features): for j in range(i + 1, self.args.num_features): m_l.append(tf.expand_dims(m[:, i, j], 1)) mm = tf.concat(1, m_l) w_mm_dim = (self.args.num_features**2 - self.args.num_features) / 2 # C(n, 2) # w1_mm = tf.Variable(tf.random_uniform([w_mm_dim, 1], -.1, .1)) # b1_mm = tf.Variable(tf.random_uniform([1], -.1, .1)) w1_mm = tf.Variable(tf.random_uniform([w_mm_dim, 1], 1.0, 1.0)) b1_mm = tf.Variable(tf.random_uniform([1], 0.0, 0.0)) a1_mm = tf.matmul(mm, w1_mm) + b1_mm z1_mm = a1_mm # linear mmm = z1_mm vv_x = tf.squeeze(mmm, [1]) # w_mm = tf.Variable(tf.random_uniform([w_mm_dim], -.1, .1)) # w_mm = tf.Variable(tf.constant([2.0]*mm.get_shape()[1])) # mmm = mm * w_mm # vv_x = tf.reduce_sum(mmm, [1]) # mm = tf.matrix_band_part(m, 0, -1) - tf.matrix_band_part(m, 0, 0) # w_mm = tf.Variable(tf.random_uniform([self.args.num_features, self.args.num_features], -.1, .1)) # mmm = mm * w_mm # vv_x = tf.reduce_sum(mmm, [1, 2]) # vv_x = (tf.reduce_sum(tf.batch_matmul(embed_v, tf.transpose(embed_v, perm=[0, 2, 1])), [1, 2]) \ # - tf.reduce_sum(embed_v ** 2, [1, 2]) ) #/ 2 #+ tf.reduce_sum(embed_v ** 2, [1, 2]) # print vv_x.get_shape() all_x = b + w_x + vv_x # print all_x.get_shape() # this can only be used in tensorflow 0.12, due to tf.trace() # m = tf.batch_matmul(embed, tf.transpose(embed, perm=[0, 2, 1])) # wTx = ( tf.reduce_sum(m, [1, 2]) - tf.trace(m) ) / 2 clip_all_x = tf.clip_by_value(all_x, -35., 35.) p = 1.0 / (1.0 + tf.exp(-clip_all_x)) clip_p = tf.clip_by_value(p, 10e-8, 1.0 - 10e-8) # cost: logloss cost = -tf.reduce_sum(y * tf.log(clip_p) + (1.0-y) * tf.log(1.0-clip_p)) \ + self.args.regular * (tf.nn.l2_loss(embed_w) \ + tf.nn.l2_loss(embed_v) + tf.nn.l2_loss(w1_mm) ) opt = tf.train.AdagradOptimizer(self.args.lr).minimize(cost) return {'x': x, 'y': y, 'p': clip_p, 'cost': cost, 'opt': opt}
def build_model(self): image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image]) question = tf.placeholder(tf.int32, [self.batch_size, self.max_words_q]) answer = tf.placeholder(tf.int32, [self.batch_size, self.max_words_q]) question_length = tf.placeholder(tf.int32, [self.batch_size]) answer_length = tf.placeholder(tf.int32, [self.batch_size]) label = tf.placeholder(tf.float32, [self.batch_size,2]) state_que = tf.zeros([self.batch_size, self.input_embedding_size]) #zhe state_ans = tf.zeros([self.batch_size, self.input_embedding_size]) #zhe loss = 0.0 q_mask = tf.cast(tf.sign(question), tf.float32) # 500 * 26 ques = tf.nn.embedding_lookup(self.embed_ques_W, question) ques_drop = tf.nn.dropout(ques, 1-self.drop_out_rate) # 500 * 26 * 300 ques_drop_ = tf.reshape(ques_drop, [-1, self.input_embedding_size]) ques_after_emb_linear = tf.nn.xw_plus_b(ques_drop_, self.att_Q_W, self.att_Q_b) ques_after_emb = tf.tanh(ques_after_emb_linear) ques_after_emb = tf.reshape(ques_after_emb, [self.batch_size, self.max_words_q, self.dim_hidden]) # 500 * 26 * 1024 image_att = tf.nn.dropout(image, 1-self.drop_out_rate) image_att_linear = tf.nn.xw_plus_b(image_att, self.att_image_W, self.att_image_b) image_att_emb = tf.tanh(image_att_linear) image_emb_ = tf.reshape(image_att_emb, (self.batch_size, self.dim_hidden, 1)) q_img = tf.batch_matmul(ques_after_emb, image_emb_) q_img_ = tf.reshape(q_img, (self.batch_size, self.max_words_q)) q_img_softmax = tf.nn.log_softmax(q_img_) q_img_softmax = tf.mul(tf.exp(q_img_softmax), q_mask) q_img_softmax_sum = tf.reduce_sum(q_img_softmax, 1) q_img_softmax_sum_ = tf.expand_dims(q_img_softmax_sum, 1) q_img_softmax_ = q_img_softmax/q_img_softmax_sum_ q_img_softmax_ = tf.reshape(q_img_softmax_, (self.batch_size, 1, self.max_words_q)) q_final = tf.batch_matmul(q_img_softmax_, ques_drop) state_que = tf.reshape(q_final, (self.batch_size, self.input_embedding_size)) # pdb.set_trace() ans = tf.nn.embedding_lookup(self.embed_ques_W, answer) # inputs = tf.div(tf.reduce_sum(inputs, 1), q_a_length) state_ans = tf.reduce_sum(ans, 1) loss = 0.0 # state_que = inputs[0:500,:] # multimodal (fusing question & image) Q_drop = tf.nn.dropout(state_que, 1-self.drop_out_rate) Q_linear = tf.nn.xw_plus_b(Q_drop, self.embed_Q_W, self.embed_Q_b) Q_emb = tf.tanh(Q_linear) image_drop = tf.nn.dropout(image, 1-self.drop_out_rate) image_linear = tf.nn.xw_plus_b(image_drop, self.embed_image_W, self.embed_image_b) image_emb = tf.tanh(image_linear) A_drop = tf.nn.dropout(state_ans, 1-self.drop_out_rate) A_linear = tf.nn.xw_plus_b(A_drop, self.embed_A_W, self.embed_A_b) A_emb = tf.tanh(A_linear) QI = tf.mul(Q_emb, image_emb) QI_drop = tf.nn.dropout(QI, 1-self.drop_out_rate) QI_linear = tf.nn.xw_plus_b(QI_drop, self.embed_QI_W, self.embed_QI_b) QI_emb = tf.tanh(QI_linear) QIA = tf.mul(QI_emb, A_emb) scores_emb = tf.nn.xw_plus_b(QIA, self.embed_scor_W, self.embed_scor_b) #zhe cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=scores_emb, labels=label) #zhe # Calculate loss loss = tf.reduce_mean(cross_entropy) return loss, image, question, answer, question_length, answer_length, label
def encode(self, inputs, masks, encoder_state_input): """ In a generalized encode function, you pass in your inputs, masks, and an initial hidden state input into this function. :param inputs: Symbolic representations of your input :param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate through masked steps :param encoder_state_input: (Optional) pass this as initial hidden state to tf.nn.dynamic_rnn to build conditional representations :return: an encoded representation of your input. It can be context-level representation, word-level representation, or both. """ #read inputs question, paragraph = inputs q_mask, p_mask = masks #run biLSTM over question with tf.variable_scope('enc_q') as scope: encode_q_f_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size) encode_q_b_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size) q_outputs, q_end_state = tf.nn.bidirectional_dynamic_rnn( encode_q_f_cell, encode_q_b_cell, question, sequence_length=q_mask, dtype=tf.float32) #LSTM returns a pair of hidden states (c, h) scope.reuse_variables() #concat end states to get question representation q_fwd_state, q_bkwd_state = q_end_state self.q_rep = tf.concat( 1, (q_fwd_state[0], q_bkwd_state[0])) #q rep is Batch by 2*H_size #run biLSTM over paragraph with tf.variable_scope('enc_p') as scope: encode_p_f_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size) encode_p_b_cell = tf.nn.rnn_cell.BasicLSTMCell(self.size) p_outputs, p_end_state = tf.nn.bidirectional_dynamic_rnn( encode_p_f_cell, encode_p_b_cell, paragraph, sequence_length=p_mask, dtype=tf.float32) #condition on q rep? scope.reuse_variables() self.p_rep = tf.concat(2, p_outputs) #concat fwd and bkwd outputs #calc scores between paragraph hidden states and q-rep self.attention_weights = tf.get_variable( "attent_weights", shape=[2 * self.size, 2 * self.size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) q_attention = tf.matmul(self.q_rep, self.attention_weights) unnorm_attention = tf.batch_matmul( self.p_rep, tf.expand_dims(q_attention, axis=-1)) #dims are batch by seq by 1 self.attention = unnorm_attention / tf.sqrt( tf.reduce_sum(tf.square(unnorm_attention), axis=1, keep_dims=True)) self.knowledge_rep = tf.multiply(self.p_rep, self.attention) return self.knowledge_rep, self.attention
batch_size = 2 memory_size = 3 n_memory_slots = 4 embedding_dim = 5 hidden_size = 6 depth = 2 shapes = { 'gru_state': (batch_size, embedding_dim), 'h': (batch_size, hidden_size), 'M': (batch_size, memory_size, n_memory_slots), 'w': (batch_size, n_memory_slots, 1), 'input': (batch_size, hidden_size) } def ones_variable(name): shape = shapes[name] return tf.Variable(np.ones(shape), dtype=tf.float32, name=name) with tf.Session() as sess: M = ones_variable('M') w = ones_variable('w') tf.initialize_all_variables().run() print(sess.run(tf.batch_matmul(M, w))) # x = tf.zeros([2, 2]) # m = tf.zeros([2, 2]) # g, _ = tf.nn.rnn_cell.BasicRNNCell(2)(x, x)
def extend_vector(input, r, batch_size): """ [a,b,c] --> [[a,a,a],[b,b,b],[c,c,c]] if D=3 """ return tf.batch_matmul(tf.ones([batch_size, r, 1]), tf.expand_dims(input, 1))
def key_addressing_and_value_reading(self, q_b, k_b, v_b): # Debugging logits_list = [None] * FLAGS.hops probs_list = [None] * FLAGS.hops o_list = [None] * FLAGS.hops q_list = [None] * FLAGS.hops self.debug_dict['logits'] = logits_list self.debug_dict['probs'] = probs_list self.debug_dict['o_list'] = o_list self.debug_dict['q_list'] = q_list for h in range(FLAGS.hops): # # Key Addressing # # [batch_sz, embedding_sz, 1] q_temp = tf.expand_dims(q_b, -1) # [batch_sz, mem_sz, 1] logits = tf.batch_matmul(k_b, q_temp) # [batch_sz, mem_sz] logits = tf.squeeze(logits) probs = tf.nn.softmax(logits) # Ignore memory padding probs = probs * self.mem_wts_b # [batch_sz, 1] z = tf.expand_dims(tf.reduce_sum(probs, 1), -1) # [batch_sz, mem_sz] probs = probs / z # # Value Reading # # [batch_sz, mem_sz, 1] probs = tf.expand_dims(probs, -1) # [batch_sz, embedding_sz] o = tf.reduce_sum(probs * v_b, 1) R = self.R_list[h] R_b = self.Rb_list[h] if FLAGS.value_reading == 'o': q_b = o elif FLAGS.value_reading == 'o.R': q_b = tf.matmul(o, R) elif FLAGS.value_reading == 'q + o': q_b = q_b + o elif FLAGS.value_reading == 'q + o.R': q_b = q_b + tf.matmul(o, R) elif FLAGS.value_reading == 'q.R + o': q_b = tf.matmul(q_b, R) + o elif FLAGS.value_reading == '(q + o).R': if h < FLAGS.hops - 1: q_b = tf.matmul(q_b + o, R) + R_b else: q_b = q_b + o # q_b = tf.matmul(q_b + o, R) + R_b elif FLAGS.value_reading is None or \ FLAGS.value_reading == '(q + o).R & o.R': if h < FLAGS.hops - 1: q_b = tf.matmul(q_b + o, R) else: q_b = tf.matmul(o, R) else: assert (False) # Debugging logits_list[h] = logits probs_list[h] = probs o_list[h] = o q_list[h] = q_b # q_b = tf.tanh(q_b) return q_b
# h1 = tf.nn.elu(tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner = "sum") + b1) # h1 = tf.nn.relu6(tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner = "sum") + b1) l1 = tf.nn.embedding_lookup_sparse(W1, sp_ids, None, combiner="sum") + b1 Ze = tf.nn.embedding_lookup(Z, z_idx) if non_linear_z: h1 = tf.tanh(l1 + Ze) else: h1 = tf.tanh(l1) + Ze ## batch normalization doesn't work that well in comparison to Torch # h1 = batch_norm_wrapper(l1, tr_ind) h1e = tf.nn.embedding_lookup(h1, y_idx_comp) W2e = tf.nn.embedding_lookup(W2, y_idx_prot) b2e = tf.nn.embedding_lookup(b2, tf.squeeze(y_idx_prot, [1])) l2 = tf.squeeze(tf.batch_matmul(h1e, W2e, adj_y=True), [1, 2]) + b2e y_pred = l2 + b2g ## batch normalization doesn't work that well in comparison to Torch # scale2e = tf.nn.embedding_lookup(scale2, tf.squeeze(y_idx_prot, [1])) # beta2e = tf.nn.embedding_lookup(beta2, tf.squeeze(y_idx_prot, [1])) # batch_mean2, batch_var2 = tf.nn.moments(l2,[0]) # z2 = (l2 - batch_mean2) / tf.sqrt(batch_var2 + epsilon) # y_pred = scale2e * l2 + b2g b_ratio = np.float32(Ncmpd) / np.float32(batch_size) y_loss = tf.reduce_sum(tf.square(y_val - y_pred)) #l2_reg = lambda_reg * tf.global_norm((W1, W2))**2 + lambda_zreg * b_ratio * tf.nn.l2_loss(Ze) l2_reg = lambda_reg * tf.global_norm( (W1, W2))**2 + lambda_zreg * tf.nn.l2_loss(Z)
def build_generator(self): video = tf.placeholder( tf.float32, [self.batch_size, self.n_lstm_steps, self.dim_image]) video_mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps]) video_flat = tf.reshape(video, [-1, self.dim_image]) image_emb = tf.nn.xw_plus_b(video_flat, self.encode_image_W, self.encode_image_b) image_emb = tf.reshape( image_emb, [self.batch_size, self.n_lstm_steps, self.dim_hidden]) image_emb = tf.transpose(image_emb, [1, 0, 2]) state1 = tf.zeros([self.batch_size, self.lstm3.state_size]) h_prev = tf.zeros([self.batch_size, self.dim_hidden]) generated_words = [] current_embed = tf.zeros([self.batch_size, self.dim_hidden]) brcst_w = tf.tile(tf.expand_dims(self.embed_att_w, 0), [self.n_lstm_steps, 1, 1]) # n x h x 1 image_part = tf.batch_matmul( image_emb, tf.tile( tf.expand_dims(self.embed_att_Ua, 0), [self.n_lstm_steps, 1, 1])) + self.embed_att_ba # n x b x h for i in range(n_caption_step): e = tf.tanh(tf.matmul(h_prev, self.embed_att_Wa) + image_part) # n x b x h e = tf.batch_matmul(e, brcst_w) e = tf.reduce_sum(e, 2) # n x b e_hat_exp = tf.mul(tf.transpose(video_mask), tf.exp(e)) # n x b denomin = tf.reduce_sum(e_hat_exp, 0) # b denomin = denomin + tf.to_float(tf.equal(denomin, 0)) alphas = tf.tile(tf.expand_dims(tf.div(e_hat_exp, denomin), 2), [1, 1, self.dim_hidden]) # n x b x h attention_list = tf.mul(alphas, image_emb) # n x b x h atten = tf.reduce_sum(attention_list, 0) # b x h if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM3") as vs: output1, state1 = self.lstm3( tf.concat(1, [atten, current_embed]), state1) # b x h lstm3_variables = [ v for v in tf.all_variables() if v.name.startswith(vs.name) ] output2 = tf.tanh( tf.nn.xw_plus_b(tf.concat(1, [output1, atten, current_embed]), self.embed_nn_Wp, self.embed_nn_bp)) # b x h h_prev = output1 logit_words = tf.nn.xw_plus_b(output2, self.embed_word_W, self.embed_word_b) # b x w max_prob_index = tf.argmax(logit_words, 1) # b generated_words.append(max_prob_index) # b with tf.device("/cpu:0"): current_embed = tf.nn.embedding_lookup(self.Wemb, max_prob_index) generated_words = tf.transpose(tf.pack(generated_words)) return video, video_mask, generated_words, lstm3_variables
def _transfer(self, transfer_matrix, embeddings): return tf.batch_matmul(transfer_matrix, embeddings)
def cumsum_weights(input, W, r=D): masked = mask(input, W, r) triangle = ones_triangular(NUM_NOTES) size = batch_size return tf.batch_matmul(masked, np.array([triangle] * size))
def build_network(self): with tf.variable_scope('encoder'): z_mean_w = tf.Variable( self.initializer([self._enc_cell.state_size, self.n_latent])) z_mean_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32)) z_logvar_w = tf.Variable( self.initializer([self._enc_cell.state_size, self.n_latent])) z_logvar_b = tf.Variable( tf.zeros([self.n_latent], dtype=tf.float32)) _, enc_state = rnn.rnn(self._enc_cell, self.inputs, dtype=tf.float32) self.z_mean = tf.add(tf.matmul(enc_state, z_mean_w), z_mean_b) self.z_log_var = tf.add(tf.matmul(enc_state, z_logvar_w), z_logvar_b) eps = tf.random_normal((self.batch_size, self.n_latent), 0, 1, dtype=tf.float32) self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_var)), eps)) with tf.variable_scope('decoder') as scope: dec_in_w = tf.Variable( self.initializer([self.n_latent, self._dec_cell.state_size], dtype=tf.float32)) dec_in_b = tf.Variable( tf.zeros([self._dec_cell.state_size], dtype=tf.float32)) dec_out_w = tf.Variable( self.initializer([self.n_hidden, self.elem_num], dtype=tf.float32)) dec_out_b = tf.Variable(tf.zeros([self.elem_num], dtype=tf.float32)) initial_dec_state = self.transfer_func( tf.add(tf.matmul(self.z, dec_in_w), dec_in_b)) dec_out, _ = seq2seq.rnn_decoder(self.inputs, initial_dec_state, self._dec_cell) if self.reverse: dec_out = dec_out[::-1] dec_output = tf.transpose(tf.pack(dec_out), [1, 0, 2]) batch_dec_out_w = tf.tile(tf.expand_dims(dec_out_w, 0), [self.batch_size, 1, 1]) self.output = tf.nn.sigmoid( tf.batch_matmul(dec_output, batch_dec_out_w) + dec_out_b) scope.reuse_variables() dec_gen_input = [ 0.5 * tf.ones([self.batch_size, self.elem_num], dtype=tf.float32) for _ in range(self.step_num) ] self.z_gen = tf.placeholder(tf.float32, [self.batch_size, self.n_latent]) dec_gen_state = self.transfer_func( tf.add(tf.matmul(self.z_gen, dec_in_w), dec_in_b)) dec_gen_out, _ = seq2seq.rnn_decoder(dec_gen_input, dec_gen_state, self._dec_cell) if self.reverse: dec_gen_out = dec_gen_out[::-1] dec_gen_output = tf.transpose(tf.pack(dec_gen_out), [1, 0, 2]) self.gen_output = tf.nn.sigmoid( tf.batch_matmul(dec_gen_output, batch_dec_out_w) + dec_out_b) self.inp = tf.transpose(tf.pack(self.inputs), [1, 0, 2]) self.train_loss = self.get_loss() self.train = tf.train.AdamOptimizer(self.learning_rate).minimize( self.train_loss)