def __init__(self, learning_rate, input_shape, BS): #input_shape example: [BS,1,28,28] self.lr = learning_rate self.conv2d_1 = ly.conv2d(input_shape, [5, 5, 1, 32], [1, 1]) self.relu_1 = ly.relu() self.max_pool_1 = ly.max_pooling(self.conv2d_1.output_shape, filter_shape=[2, 2], strides=[2, 2]) self.conv2d_2 = ly.conv2d(self.max_pool_1.output_shape, [5, 5, 32, 64], [1, 1]) self.relu_2 = ly.relu() self.max_pool_2 = ly.max_pooling(self.conv2d_2.output_shape, filter_shape=[2, 2], strides=[2, 2]) self.flatter = ly.flatter() self.full_connect_1 = ly.full_connect(input_len=7 * 7 * 64, output_len=1024) self.relu_3 = ly.relu() self.dropout_1 = ly.dropout(1024) self.full_connect_2 = ly.full_connect(input_len=1024, output_len=10) self.loss_func = ly.softmax_cross_entropy_error()
def __init__(self, learning_rate, input_shape): #input_shape example: [BS,1,28,28] self.lr = learning_rate # conv1:(BS,1,28,28)->(BS,6,28,28)->(BS,6,14,14) self.conv2d_1 = ly.conv2d(input_shape, [5, 5, 1, 6], [1, 1], 'SAME') self.relu_1 = ly.relu() self.pool_1 = ly.max_pooling(self.conv2d_1.output_shape, [2, 2], [2, 2], 'SAME') # conv2:(BS,6,14,14)->(BS,10,14,14)->(BS,10,7,7) self.conv2d_2 = ly.conv2d(self.pool_1.output_shape, [5, 5, 6, 10], [1, 1], 'SAME') self.relu_2 = ly.relu() self.pool_2 = ly.max_pooling(self.conv2d_2.output_shape, [2, 2], [2, 2], 'SAME') # flat:(BS,10,7,7)->(BS,490) self.flatter = ly.flatter() # fc1:(BS,490)->(BS,84) self.full_connect_1 = ly.full_connect(490, 84) self.relu_3 = ly.relu() self.dropout = ly.dropout(lenth=84) # fc2:(BS,84)->(BS,10) self.full_connect_2 = ly.full_connect(84, 10) self.loss_func = ly.softmax_cross_entropy_error()
def forward(self, p, p_pos, p_ner, p_mask, q, q_pos, q_ner, q_mask, c,c_pos,c_ner, c_mask,\ p_f_tensor,q_f_tensor,c_f_tensor, p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation,is_paint=0): p_rnn_input, q_rnn_input, c_rnn_input = self.add_embeddings(p, p_pos, p_ner, q, q_pos, q_ner, c,c_pos,c_ner,p_f_tensor,q_f_tensor,c_f_tensor,\ p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation) p_hiddens = self.context_rnn(p_rnn_input, p_mask) q_hiddens = self.context_rnn(q_rnn_input, q_mask) c_hiddens = self.context_rnn(c_rnn_input, c_mask) if self.args.dropout_rnn_output > 0: p_hiddens = nn.functional.dropout(p_hiddens, p=self.args.dropout_rnn_output, training=self.training) q_hiddens = nn.functional.dropout(q_hiddens, p=self.args.dropout_rnn_output, training=self.training) c_hiddens = nn.functional.dropout(c_hiddens, p=self.args.dropout_rnn_output, training=self.training) #################################################### self.mfunction(p_hiddens, q_hiddens, c_hiddens, p_mask, q_mask, c_mask) ''' if self.args.tri_input == 'NA': self.NA_TriMatching(p_hiddens, q_hiddens, c_hiddens,p_mask, q_mask, c_mask) elif self.args.tri_input == 'CA': self.CA_TriMatching(p_hiddens, q_hiddens, c_hiddens,p_mask, q_mask, c_mask) else: self.NA_CA_TriMatching(p_hiddens, q_hiddens, c_hiddens,p_mask, q_mask, c_mask) ''' #------output-layer-------------- _, matched_q_self = self.q_self_attn(self.matched_q, q_mask) _, matched_p_self = self.q_self_attn(self.matched_p, p_mask) _, matched_c_self = self.q_self_attn(self.matched_c, c_mask) p_infer_hidden_ave = layers.ave_pooling(self.p_infer_emb, p_mask) p_infer_hidden_max = layers.max_pooling(self.p_infer_emb) q_infer_hidden_ave = layers.ave_pooling(self.q_infer_emb, q_mask) q_infer_hidden_max = layers.max_pooling(self.q_infer_emb) c_infer_hidden_ave = layers.ave_pooling(self.c_infer_emb, c_mask) c_infer_hidden_max = layers.max_pooling(self.c_infer_emb) #import pdb #pdb.set_trace() infer_linear = self.c_infer_linear(torch.cat([p_infer_hidden_ave,p_infer_hidden_max,\ q_infer_hidden_ave,q_infer_hidden_max,\ c_infer_hidden_ave,c_infer_hidden_max,\ matched_q_self,matched_p_self, matched_c_self],-1)) logits = self.logits_linear(infer_linear) proba = F.sigmoid(logits.squeeze(1)) return proba
def vgg16(inputs, num_classes, keep_prob, is_training): """vgg16 network """ # x = tf.reshape(inputs, shape=[-1, 28, 28, 3]) x = tf.nn.lrn(inputs, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='inputs') # first conv block conv1_1 = conv2d(x, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv1_1') conv1_2 = conv2d(conv1_1, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv1_2') pool1 = max_pooling(conv1_2, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool1') # second conv block conv2_1 = conv2d(pool1, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv2_1') conv2_2 = conv2d(conv2_1, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv2_2') pool2 = max_pooling(conv2_2, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool2') # 3th conv block conv3_1 = conv2d(pool2, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_1') conv3_2 = conv2d(conv3_1, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_2') conv3_3 = conv2d(conv3_2, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_3') pool3 = max_pooling(conv3_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool3') # 4th conv block conv4_1 = conv2d(pool3, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_1') conv4_2 = conv2d(conv4_1, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_2') conv4_3 = conv2d(conv4_2, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_3') pool4 = max_pooling(conv4_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool4') # 5th conv block conv5_1 = conv2d(pool4, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_1') conv5_2 = conv2d(conv5_1, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_2') conv5_3 = conv2d(conv5_2, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_3') pool5 = max_pooling(conv5_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool5') # fully connected block # flatten outputs of the previous layer as a one dimension vector # flatten_shape = tf.shape(pool5)[1] * tf.shape(pool5)[2] * tf.shape(pool5)[3] flatten_shape = pool5.get_shape()[1].value * pool5.get_shape()[2].value * pool5.get_shape()[3].value fc1 = tf.reshape(pool5, shape=[-1, flatten_shape]) fc1 = fc(fc1, shape=[flatten_shape, 4096], name='fc1') fc1 = dropout(fc1, keep_prob=0.5, name='dropout1') fc2 = fc(fc1, shape=[4096, 4096], name='fc2') fc2 = dropout(fc2, keep_prob=0.5, name='dropout2') fc3 = fc(fc2, shape=[4096, num_classes], name='fc3') fc3 = dropout(fc3, keep_prob=0.5, name='dropout3') # output logits value logits = tf.nn.softmax(fc3, name="softmax") return logits
def graph_forward(): model = graph.Graph() model.add( layers.conv(layers.xaxier_initilizer, layers.zero_initilizer, 1, 1, 32, 4, 3, 3)) #model.add(layers.Relu()) model.add( layers.conv(layers.xaxier_initilizer, layers.zero_initilizer, 1, 2, 16, 32, 3, 3)) #model.add(layers.Relu()) model.add( layers.max_pooling(layers.xaxier_initilizer, layers.zero_initilizer, 0, 2, 2, 2)) model.add(layers.flatten()) model.add( layers.FullConn(layers.xaxier_initilizer, layers.zero_initilizer, (1024, 10))) #model.add(layers.Relu()) crit = layers.softmax_with_loss() y = np.array([1, 2, 3]) def foo(x): logits = model.forward(x) prob = crit.forward(logits) dy, loss = crit.backward(logits, y) dx = model.backward(x, dy) return loss, dx return foo
def __init__(self, input_shape, learning_rate=0.001): BS = input_shape[0] self.lr = learning_rate #conv1 : 1*28*28->6*12*12 self.conv1 = ly.conv2d(input_shape, [5, 5, 1, 6], [1, 1], 'same') self.conv1_relu = ly.relu() self.pool1 = ly.max_pooling(self.conv1.out_shape, [3, 3], [2, 2], 'valid') # conv2 : 6*12*12 - > 10*5*5 self.conv2 = ly.conv2d(self.pool1.out_shape, [3, 3, 6, 10], [1, 1], 'same') self.conv2_relu = ly.relu() self.pool2 = ly.max_pooling(self.conv2.out_shape, [3, 3], [2, 2], 'valid') self.conv_fc = ly.conv_fc() self.fc1 = ly.full_connect(360, 84) self.fc1_relu = ly.relu() self.fc2 = ly.full_connect(84, 10) self.loss = ly.softmax_cross_with_entropy()
def pool_x_forward(): N, c, h, w = 5, 10, 5, 5 pool = layers.max_pooling(layers.xaxier_initilizer, layers.zero_initilizer, 0, 2, 2, 2) dm = layers.dummy() def f(x): y = pool.forward(x) yout = dm.forward(y) dy = dm.backward(y, 1) dx = pool.backward(x, dy) return yout, dx return f
def forward(self, p, p_pos, p_ner, p_mask, q, q_pos, q_ner, q_mask, c,c_pos,c_ner, c_mask,\ p_f_tensor,q_f_tensor,c_f_tensor, p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation,is_paint=0): p_emb, q_emb, c_emb = self.embedding(p), self.embedding( q), self.embedding(c) p_pos_emb, q_pos_emb, c_pos_emb = self.pos_embedding( p_pos), self.pos_embedding(q_pos), self.pos_embedding(c_pos) p_ner_emb, q_ner_emb, c_ner_emb = self.ner_embedding( p_ner), self.ner_embedding(q_ner), self.ner_embedding(c_ner) p_q_rel_emb, p_c_rel_emb = self.rel_embedding( p_q_relation), self.rel_embedding(p_c_relation) q_p_rel_emb, q_c_rel_emb = self.rel_embedding( q_p_relation), self.rel_embedding(q_c_relation) c_p_rel_emb, c_q_rel_emb = self.rel_embedding( c_p_relation), self.rel_embedding(c_q_relation) # Dropout on embeddings if self.args.dropout_emb > 0: p_emb = nn.functional.dropout(p_emb, p=self.args.dropout_emb, training=self.training) q_emb = nn.functional.dropout(q_emb, p=self.args.dropout_emb, training=self.training) c_emb = nn.functional.dropout(c_emb, p=self.args.dropout_emb, training=self.training) p_pos_emb = nn.functional.dropout(p_pos_emb, p=self.args.dropout_emb, training=self.training) q_pos_emb = nn.functional.dropout(q_pos_emb, p=self.args.dropout_emb, training=self.training) c_pos_emb = nn.functional.dropout(c_pos_emb, p=self.args.dropout_emb, training=self.training) p_ner_emb = nn.functional.dropout(p_ner_emb, p=self.args.dropout_emb, training=self.training) q_ner_emb = nn.functional.dropout(q_ner_emb, p=self.args.dropout_emb, training=self.training) c_ner_emb = nn.functional.dropout(c_ner_emb, p=self.args.dropout_emb, training=self.training) #_,q_weighted_emb = self.q_emb_match(q_emb, q_emb, q_mask) #_,q_c_weighted_emb = self.q_emb_match(q_emb, c_emb, c_mask) #_,c_p_weighted_emb = self.c_emb_match(c_emb, p_emb, p_mask) #_,c_weighted_emb = self.c_emb_match(c_emb, c_emb, c_mask) #if self.args.dropout_emb > 0: # q_weighted_emb = nn.functional.dropout(q_weighted_emb, p=self.args.dropout_emb, training=self.training) #q_c_weighted_emb = nn.functional.dropout(q_c_weighted_emb, p=self.args.dropout_emb, training=self.training) #c_p_weighted_emb = nn.functional.dropout(c_p_weighted_emb, p=self.args.dropout_emb, training=self.training) # c_weighted_emb = nn.functional.dropout(c_weighted_emb, p=self.args.dropout_emb, training=self.training) p_rnn_input = torch.cat([ p_emb, p_pos_emb, p_ner_emb, p_f_tensor, p_q_rel_emb, p_c_rel_emb ], 2) q_rnn_input = torch.cat([ q_emb, q_pos_emb, q_ner_emb, q_f_tensor, q_p_rel_emb, q_c_rel_emb ], 2) c_rnn_input = torch.cat([ c_emb, c_pos_emb, c_ner_emb, c_f_tensor, c_p_rel_emb, c_q_rel_emb ], 2) p_hiddens = self.context_rnn(p_rnn_input, p_mask) q_hiddens = self.context_rnn(q_rnn_input, q_mask) c_hiddens = self.context_rnn(c_rnn_input, c_mask) #if self.args.dropout_rnn_output > 0: # p_hiddens = nn.functional.dropout(p_hiddens, p=self.args.dropout_rnn_output, training=self.training) # q_hiddens = nn.functional.dropout(q_hiddens, p=self.args.dropout_rnn_output, training=self.training) # c_hiddens = nn.functional.dropout(c_hiddens, p=self.args.dropout_rnn_output, training=self.training) #################################################### #------q_p-------------- _, q_p_weighted_hiddens = self.hidden_match(q_hiddens, p_hiddens, p_mask) q_p_cat = torch.cat([q_hiddens, q_p_weighted_hiddens], 2) q_p_cat_weight, q_p_cat_weighted_hiddens = self.hidden_match( q_p_cat, q_p_cat, q_mask) if self.args.dropout_att_score > 0: q_p_cat_weight = nn.functional.dropout( q_p_cat_weight, p=self.args.dropout_att_score, training=self.training) matched_q = q_p_cat_weight.bmm(q_hiddens) #------p_c_q-------------- _, p_c_weighted_hiddens = self.hidden_match(p_hiddens, c_hiddens, c_mask) _, p_q_weighted_hiddens = self.hidden_match(p_hiddens, q_hiddens, q_mask) p_cq_cat = torch.cat( [p_hiddens, p_c_weighted_hiddens, p_q_weighted_hiddens], 2) p_cq_cat_weight, p_cq_cat_weighted_hiddens = self.hidden_match( p_cq_cat, p_cq_cat, p_mask) if self.args.dropout_att_score > 0: p_cq_cat_weight = nn.functional.dropout( p_cq_cat_weight, p=self.args.dropout_att_score, training=self.training) matched_p = p_cq_cat_weight.bmm(p_hiddens) #------c_p_q-------------- _, c_p_weighted_hiddens = self.hidden_match(c_hiddens, p_hiddens, p_mask) _, c_q_weighted_hiddens = self.hidden_match(c_hiddens, q_hiddens, q_mask) concat_feature = torch.cat( [c_hiddens, c_q_weighted_hiddens, c_p_weighted_hiddens], 2) sub_feature = (c_hiddens - c_q_weighted_hiddens) * ( c_hiddens - c_p_weighted_hiddens) mul_feature = c_hiddens * c_q_weighted_hiddens * c_p_weighted_hiddens c_mfeature = {"c": concat_feature, "s": sub_feature, "m": mul_feature} dim = c_hiddens.size() init_mem = torch.zeros(dim[0], dim[1], dim[2]).float().cuda() #zero mem c_infer_emb, self.mem_list, self.mem_gate_list = self.mtinfer( c_mfeature, c_mask, init_mem=init_mem, x_order=self.args.matching_order) _, matched_q_self = self.q_self_attn(matched_q, q_mask) _, matched_p_self = self.q_self_attn(matched_p, p_mask) c_infer_hidden_ave = layers.ave_pooling(c_infer_emb, c_mask) c_infer_hidden_max = layers.max_pooling(c_infer_emb) #import pdb #pdb.set_trace() infer_linear = self.c_infer_linear( torch.cat([ c_infer_hidden_ave, c_infer_hidden_max, matched_q_self, matched_p_self ], -1)) logits = self.logits_linear(infer_linear) proba = F.sigmoid(logits.squeeze(1)) return proba
def alexnet(inputs, num_classes, keep_prob): """Create alexnet model """ x = tf.reshape(inputs, shape=[-1, 28, 28, 1]) # first conv layer, downsampling layer, and normalization layer conv1 = conv2d(x, shape=(11, 11, 1, 96), padding='SAME', name='conv1') pool1 = max_pooling(conv1, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool1') norm1 = norm(pool1, radius=4, name='norm1') # second conv layer conv2 = conv2d(norm1, shape=(5, 5, 96, 256), padding='SAME', name='conv2') pool2 = max_pooling(conv2, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool2') norm2 = norm(pool2, radius=4, name='norm2') # 3rd conv layer conv3 = conv2d(norm2, shape=(3, 3, 256, 384), padding='SAME', name='conv3') # pool3 = max_pooling(conv3, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool3') norm3 = norm(conv3, radius=4, name='norm3') # 4th conv layer conv4 = conv2d(norm3, shape=(3, 3, 384, 384), padding='SAME', name='conv4') # 5th conv layer conv5 = conv2d(conv4, shape=(3, 3, 384, 256), padding='SAME', name='conv5') pool5 = max_pooling(conv5, ksize=(2, 2), stride=(2, 2), padding='SAME', name='pool5') norm5 = norm(pool5, radius=4, name='norm5') # first fully connected layer fc1 = tf.reshape(norm5, shape=(-1, 4 * 4 * 256)) fc1 = fc(fc1, shape=(4 * 4 * 256, 4096), name='fc1') fc1 = dropout(fc1, keep_prob=keep_prob, name='dropout1') fc2 = fc(fc1, shape=(4096, 4096), name='fc2') fc2 = dropout(fc2, keep_prob=keep_prob, name='dropout2') # output logits value with tf.variable_scope('classifier') as scope: weights = tf.get_variable('weights', shape=[4096, num_classes], initializer=tf.initializers.he_normal()) biases = tf.get_variable('biases', shape=[num_classes], initializer=tf.initializers.random_normal()) # define output logits value logits = tf.add(tf.matmul(fc2, weights), biases, name=scope.name + '_logits') return logits
def forward(self, p, p_pos, p_ner, p_mask, q, q_pos, q_ner, q_mask, c,c_pos,c_ner, c_mask,\ p_f_tensor,q_f_tensor,c_f_tensor, p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation,is_paint=0): p_rnn_input, q_rnn_input, c_rnn_input = self.add_embeddings(p, p_pos, p_ner, q, q_pos, q_ner, c,c_pos,c_ner,p_f_tensor,q_f_tensor,c_f_tensor,\ p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation) p_hiddens = self.context_rnn(p_rnn_input, p_mask) q_hiddens = self.context_rnn(q_rnn_input, q_mask) c_hiddens = self.context_rnn(c_rnn_input, c_mask) if self.args.dropout_rnn_output > 0: p_hiddens = nn.functional.dropout(p_hiddens, p=self.args.dropout_rnn_output, training=self.training) q_hiddens = nn.functional.dropout(q_hiddens, p=self.args.dropout_rnn_output, training=self.training) c_hiddens = nn.functional.dropout(c_hiddens, p=self.args.dropout_rnn_output, training=self.training) #################################################### self.mfunction(p_hiddens, c_hiddens, p_mask, c_mask) #------output-layer-------------- #_,matched_q_self = self.q_self_attn(self.matched_q,q_mask) _, matched_p_self = self.q_self_attn(self.matched_p, p_mask) _, matched_c_self = self.q_self_attn(self.matched_c, c_mask) outputs = [matched_p_self, matched_c_self] if self.args.p_channel == True: p_infer_hidden_ave = layers.ave_pooling(self.p_infer_emb, p_mask) p_infer_hidden_max = layers.max_pooling(self.p_infer_emb) outputs.append(p_infer_hidden_ave) outputs.append(p_infer_hidden_max) ''' if self.args.q_channel==True: q_infer_hidden_ave = layers.ave_pooling(self.q_infer_emb,q_mask) q_infer_hidden_max = layers.max_pooling(self.q_infer_emb) outputs.append(q_infer_hidden_ave) outputs.append(q_infer_hidden_max) ''' if self.args.c_channel == True: c_infer_hidden_ave = layers.ave_pooling(self.c_infer_emb, c_mask) c_infer_hidden_max = layers.max_pooling(self.c_infer_emb) outputs.append(c_infer_hidden_ave) outputs.append(c_infer_hidden_max) #import pdb #pdb.set_trace() #infer_linear = self.c_infer_linear(torch.cat([p_infer_hidden_ave,p_infer_hidden_max,\ # q_infer_hidden_ave,q_infer_hidden_max,\ # c_infer_hidden_ave,c_infer_hidden_max,\ # matched_q_self,matched_p_self, matched_c_self],-1)) infer_linear = self.c_infer_linear(torch.cat(outputs, -1)) logits = self.logits_linear(infer_linear) #[0.1,0.2,0.5] proba = F.sigmoid(logits.squeeze(1)) print(infer_linear.size(), logits.size(), proba.size()) #torch.Size([44, 250]) torch.Size([44, 1]) torch.Size([44]) #proba = F.log_softmax(logits.squeeze(1)) return proba
def forward(self, p, p_pos, p_ner, p_mask, q, q_pos, q_ner, q_mask, c,c_pos,c_ner, c_mask,\ p_f_tensor,q_f_tensor,c_f_tensor, p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation,is_paint=0): self.p = p self.q = q self.c = c p_emb, q_emb, c_emb = self.embedding(p), self.embedding( q), self.embedding(c) p_pos_emb, q_pos_emb, c_pos_emb = self.pos_embedding( p_pos), self.pos_embedding(q_pos), self.pos_embedding(c_pos) p_ner_emb, q_ner_emb, c_ner_emb = self.ner_embedding( p_ner), self.ner_embedding(q_ner), self.ner_embedding(c_ner) p_q_rel_emb, p_c_rel_emb = self.rel_embedding( p_q_relation), self.rel_embedding(p_c_relation) q_p_rel_emb, q_c_rel_emb = self.rel_embedding( q_p_relation), self.rel_embedding(q_c_relation) c_p_rel_emb, c_q_rel_emb = self.rel_embedding( c_p_relation), self.rel_embedding(c_q_relation) # Dropout on embeddings if self.args.dropout_emb > 0: p_emb = nn.functional.dropout(p_emb, p=self.args.dropout_emb, training=self.training) q_emb = nn.functional.dropout(q_emb, p=self.args.dropout_emb, training=self.training) c_emb = nn.functional.dropout(c_emb, p=self.args.dropout_emb, training=self.training) p_pos_emb = nn.functional.dropout(p_pos_emb, p=self.args.dropout_emb, training=self.training) q_pos_emb = nn.functional.dropout(q_pos_emb, p=self.args.dropout_emb, training=self.training) c_pos_emb = nn.functional.dropout(c_pos_emb, p=self.args.dropout_emb, training=self.training) p_ner_emb = nn.functional.dropout(p_ner_emb, p=self.args.dropout_emb, training=self.training) q_ner_emb = nn.functional.dropout(q_ner_emb, p=self.args.dropout_emb, training=self.training) c_ner_emb = nn.functional.dropout(c_ner_emb, p=self.args.dropout_emb, training=self.training) #_,q_weighted_emb = self.q_emb_match(q_emb, q_emb, q_mask) #_,q_c_weighted_emb = self.q_emb_match(q_emb, c_emb, c_mask) #_,c_p_weighted_emb = self.c_emb_match(c_emb, p_emb, p_mask) #_,c_weighted_emb = self.c_emb_match(c_emb, c_emb, c_mask) #if self.args.dropout_emb > 0: # q_weighted_emb = nn.functional.dropout(q_weighted_emb, p=self.args.dropout_emb, training=self.training) #q_c_weighted_emb = nn.functional.dropout(q_c_weighted_emb, p=self.args.dropout_emb, training=self.training) #c_p_weighted_emb = nn.functional.dropout(c_p_weighted_emb, p=self.args.dropout_emb, training=self.training) # c_weighted_emb = nn.functional.dropout(c_weighted_emb, p=self.args.dropout_emb, training=self.training) p_rnn_input = torch.cat([ p_emb, p_pos_emb, p_ner_emb, p_f_tensor, p_q_rel_emb, p_c_rel_emb ], 2) q_rnn_input = torch.cat([ q_emb, q_pos_emb, q_ner_emb, q_f_tensor, q_p_rel_emb, q_c_rel_emb ], 2) c_rnn_input = torch.cat([ c_emb, c_pos_emb, c_ner_emb, c_f_tensor, c_p_rel_emb, c_q_rel_emb ], 2) #q_rnn_input = torch.cat([q_emb,q_f_tensor],2) #c_rnn_input = torch.cat([c_emb,c_f_tensor],2) p_hiddens = self.context_rnn(p_rnn_input, p_mask) q_hiddens = self.context_rnn(q_rnn_input, q_mask) c_hiddens = self.context_rnn(c_rnn_input, c_mask) # print('p_hiddens', p_hiddens.size()) #################################################### #c_p_weighted_hiddens = self.hidden_match(c_hiddens,p_hiddens,p_mask) _, c_q_weighted_hiddens = self.hidden_match(c_hiddens, q_hiddens, q_mask) #------q_p-------------- _, q_p_weighted_hiddens = self.hidden_match(q_hiddens, p_hiddens, p_mask) q_p_cat = torch.cat([q_hiddens, q_p_weighted_hiddens], 2) q_p_cat_weight, q_p_cat_weighted_hiddens = self.hidden_match( q_p_cat, q_p_cat, q_mask) matched_q = q_p_cat_weight.bmm(q_hiddens) #q_q_cat = torch.cat([q_hiddens, matched_q]) #------p_c_q-------------- _, p_c_weighted_hiddens = self.hidden_match(p_hiddens, c_hiddens, c_mask) _, p_q_weighted_hiddens = self.hidden_match(p_hiddens, q_hiddens, q_mask) #p_cq_cat = torch.cat([p_hiddens,p_c_weighted_hiddens,p_q_weighted_hiddens],2) p_cq_cat = torch.cat([(p_hiddens - p_c_weighted_hiddens) * (p_hiddens - p_q_weighted_hiddens)], 2) p_cq_cat_weight, p_cq_cat_weighted_hiddens = self.hidden_match( p_cq_cat, p_cq_cat, p_mask) matched_p = p_cq_cat_weight.bmm(p_hiddens) self.matched_p_self_weight, matched_p_self = self.q_self_attn( matched_p, p_mask) #if self.args.dropout_init_mem_emb > 0: # matched_p_self = nn.functional.dropout(matched_p_self, p=self.args.dropout_init_mem_emb, training=self.training) self.c_weighted_matched_p_weight, c_weighted_matched_p = self.hidden_match( c_hiddens, matched_p, p_mask) #print(self.c_weighted_matched_p_weight) #_, q_slfp_weighted_hiddens = self.slfp_linear(x=q_hiddens,y= matched_p_self,x_mask =q_mask) #_,c_weighted_q_slfp = self.hidden_match(c_hiddens,q_slfp_weighted_hiddens ,q_mask) #print ("q_slfp_weighted_hiddens ",q_slfp_weighted_hiddens.size() ) #------c_q-------------- #concat_feature = torch.cat([c_hiddens,c_q_weighted_hiddens],2) #sub_feature = (c_hiddens -c_q_weighted_hiddens) #mul_feature = c_hiddens*c_q_weighted_hiddens #concat_feature = torch.cat([c_hiddens,c_q_weighted_hiddens],2) concat_feature = torch.cat([c_hiddens, c_q_weighted_hiddens], 2) #concat_feature = c_hiddens+ c_q_weighted_hiddens sub_feature = (c_hiddens - c_q_weighted_hiddens) mul_feature = self.args.beta * c_hiddens * c_q_weighted_hiddens #mul_feature = c_hiddens*c_q_weighted_hiddens #mul_feature = c_hiddens+c_q_weighted_hiddens c_mfeature = {"c": concat_feature, "s": sub_feature, "m": mul_feature} #c_infer_emb = self.mtinfer(c_mfeature,c_mask,x_order=self.args.matching_order,init_mem=c_weighted_matched_p) dim = c_hiddens.size() #init_mem = torch.zeros(dim[0],dim[1],dim[2]).float().cuda() #zero mem #init_mem = matched_p_self.unsqueeze(1).expand(c_hiddens.size()) #p_self mem init_mem = c_weighted_matched_p #c_weighted_matched_p mem ,best if self.args.dropout_init_mem_emb > 0: init_mem = nn.functional.dropout(init_mem, p=self.args.dropout_init_mem_emb, training=self.training) c_infer_emb, self.mem_list, self.mem_gate_list = self.mtinfer( c_mfeature, c_mask, init_mem=init_mem, x_order=self.args.matching_order) self.c_infer_emb = c_infer_emb #c_infer_emb = self.mtinfer(c_mfeature,c_mask,init_mem=c_weighted_matched_p,x_order=self.args.matching_order) #if self.args.dropout_emb > 0: # c_infer_emb = nn.functional.dropout(c_infer_emb, p=self.args.dropout_emb, training=self.training) #c_infer_hidden_self = self.c_infer_self_attn(c_infer_emb,c_mask) #c_infer_hidden_self = self.q_self_attn(c_infer_emb,c_mask) self.matched_q_self_weight, matched_q_self = self.q_self_attn( matched_q, q_mask) #matched_q_ave = layers.ave_pooling(matched_q,q_mask) c_infer_hidden_ave = layers.ave_pooling(c_infer_emb, c_mask) c_infer_hidden_max = layers.max_pooling(c_infer_emb) #c_infer_hidden = self.c_infer_linear(torch.cat([c_infer_hidden_self, c_infer_hidden_ave, c_infer_hidden_max],-1)) #logits = self.logits_linear(c_infer_hidden) #print ("c_infer_hidden_ave",c_infer_hidden_ave.size()) #print ("c_infer_hidden_max",c_infer_hidden_max.size()) #print ("matched_p_self",matched_p_self.size()) #print ("matched_q_self",matched_q_self.size()) infer_linear = self.c_infer_linear( torch.cat([ c_infer_hidden_ave, c_infer_hidden_max, matched_p_self, matched_q_self ], -1)) #infer_linear = self.c_infer_linear(torch.cat([c_infer_hidden_ave,c_infer_hidden_max,matched_p_self],-1)) #infer_linear = self.c_infer_linear(torch.cat([c_infer_hidden_ave,c_infer_hidden_self,matched_p_self,matched_q_self],-1)) #infer_linear = self.c_infer_linear(torch.cat([c_infer_hidden_self,matched_p_self,matched_q_self],-1)) logits = self.logits_linear(infer_linear) proba = F.sigmoid(logits.squeeze(1)) if is_paint == 1: self.paint_data() return proba
def forward_propagate(x, y, _weights, debug=True): activation_caches = {} m = x.shape[0] activation_caches["conv1"] = conv_fast(x, _weights["W1"], _weights["B1"], 2, 1) activation_caches["A1"] = relu(activation_caches["conv1"]) activation_caches["pool1"] = max_pooling(activation_caches["A1"], 2, 2) # Sanity check to make sure that our convolution vectorization is correct if debug: # Conv kconv, kcache = karpathy_conv_forward_naive(x, _weights["W1"], _weights["B1"], { 'stride': 1, 'pad': 2 }) assert np.mean(np.isclose(activation_caches["conv1"], kconv)) == 1.0 conv1_verify = conv_forward_naive(x, _weights["W1"], _weights["B1"], 2, 1) assert np.mean(np.isclose(activation_caches["conv1"], conv1_verify)) == 1.0 kpool1, kcache1 = karpathy_max_pool_forward_naive( activation_caches["A1"], { 'pool_height': 2, 'pool_width': 2, 'stride': 2 }) assert np.mean(np.isclose(activation_caches["pool1"], kpool1)) == 1.0 activation_caches["conv2"] = conv_fast(activation_caches["pool1"], _weights["W2"], _weights["B2"], 2, 1) activation_caches["A2"] = relu(activation_caches["conv2"]) activation_caches["pool2"] = max_pooling(activation_caches["A2"], 2, 2) activation_caches["Ar2"] = activation_caches["pool2"].reshape( (m, activation_caches["pool2"].shape[1] * activation_caches["pool2"].shape[2] * activation_caches["pool2"].shape[3])) if debug: conv2_verify = conv_forward_naive(activation_caches["pool1"], _weights["W2"], _weights["B2"], 2, 1) assert np.mean(np.isclose(activation_caches["conv2"], conv2_verify)) == 1.0 activation_caches["Z3"] = fully_connected(activation_caches["Ar2"], _weights["W3"], _weights["B3"]) activation_caches["A3"] = relu(activation_caches["Z3"]) activation_caches["Z4"] = fully_connected(activation_caches["A3"], _weights["W4"], _weights["B4"]) activation_caches["A4"] = softmax(activation_caches["Z4"]) cost = np.mean(softmax_cost(y, activation_caches["A4"], m)) return activation_caches, cost
def forward(self, p, p_pos, p_ner, p_mask, q, q_pos, q_ner, q_mask, c,c_pos,c_ner, c_mask,\ p_f_tensor,q_f_tensor,c_f_tensor, p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation,is_paint=0): p_rnn_input, q_rnn_input, c_rnn_input = self.add_embeddings(p, p_pos, p_ner, q, q_pos, q_ner, c,c_pos,c_ner,p_f_tensor,q_f_tensor,c_f_tensor,\ p_q_relation, p_c_relation,q_p_relation,q_c_relation,c_p_relation,c_q_relation) p_hiddens = self.context_rnn(p_rnn_input, p_mask) q_hiddens = self.context_rnn(q_rnn_input, q_mask) c_hiddens = self.context_rnn(c_rnn_input, c_mask) if self.args.dropout_rnn_output > 0: p_hiddens = nn.functional.dropout(p_hiddens, p=self.args.dropout_rnn_output, training=self.training) q_hiddens = nn.functional.dropout(q_hiddens, p=self.args.dropout_rnn_output, training=self.training) c_hiddens = nn.functional.dropout(c_hiddens, p=self.args.dropout_rnn_output, training=self.training) #################################################### #--------------naive attention _, p_q_weighted_hiddens = self.hidden_match(p_hiddens, q_hiddens, q_mask) _, p_c_weighted_hiddens = self.hidden_match(p_hiddens, c_hiddens, c_mask) _, q_p_weighted_hiddens = self.hidden_match(q_hiddens, p_hiddens, p_mask) _, q_c_weighted_hiddens = self.hidden_match(q_hiddens, c_hiddens, c_mask) _, c_p_weighted_hiddens = self.hidden_match(c_hiddens, p_hiddens, p_mask) _, c_q_weighted_hiddens = self.hidden_match(c_hiddens, q_hiddens, q_mask) #--------------compound attention c_q_p_weighted_hiddens = self.hidden_match(c_hiddens, q_p_weighted_hiddens, q_mask) q_c_p_weighted_hiddens = self.hidden_match(q_hiddens, c_p_weighted_hiddens, c_mask) p_c_q_weighted_hiddens = self.hidden_match(p_hiddens, c_q_weighted_hiddens, c_mask) c_p_q_weighted_hiddens = self.hidden_match(c_hiddens, p_q_weighted_hiddens, p_mask) p_q_c_weighted_hiddens = self.hidden_match(p_hiddens, q_c_weighted_hiddens, q_mask) q_p_c_weighted_hiddens = self.hidden_match(q_hiddens, p_c_weighted_hiddens, p_mask) #------p_c_q-------------- p_infer_emb, p_mems, p_mem_gates = self.tri_matching( x=p_hiddens, x_y=p_q_weighted_hiddens, x_z=p_c_weighted_hiddens, agg_function=self.mtinfer, x_mask=p_mask) #------q_p_c-------------- q_infer_emb, q_mems, q_mem_gates = self.tri_matching( x=q_hiddens, x_y=q_p_weighted_hiddens, x_z=q_c_weighted_hiddens, agg_function=self.mtinfer, x_mask=q_mask) #------c_p_q-------------- c_infer_emb, c_mems, c_mem_gates = self.tri_matching( x=c_hiddens, x_y=c_p_weighted_hiddens, x_z=c_q_weighted_hiddens, agg_function=self.mtinfer, x_mask=c_mask) #------matched_self-------------- matched_p = self.matched_self(x=p_hiddens, x_y=p_q_weighted_hiddens, x_z=p_c_weighted_hiddens, x_mask=p_mask) matched_q = self.matched_self(x=q_hiddens, x_y=q_p_weighted_hiddens, x_z=q_c_weighted_hiddens, x_mask=q_mask) matched_c = self.matched_self(x=c_hiddens, x_y=c_p_weighted_hiddens, x_z=c_q_weighted_hiddens, x_mask=c_mask) #------output-layer-------------- _, matched_q_self = self.q_self_attn(matched_q, q_mask) _, matched_p_self = self.q_self_attn(matched_p, p_mask) _, matched_c_self = self.q_self_attn(matched_c, c_mask) p_infer_hidden_ave = layers.ave_pooling(p_infer_emb, p_mask) p_infer_hidden_max = layers.max_pooling(p_infer_emb) q_infer_hidden_ave = layers.ave_pooling(q_infer_emb, q_mask) q_infer_hidden_max = layers.max_pooling(q_infer_emb) c_infer_hidden_ave = layers.ave_pooling(c_infer_emb, c_mask) c_infer_hidden_max = layers.max_pooling(c_infer_emb) #import pdb #pdb.set_trace() infer_linear = self.c_infer_linear(torch.cat([p_infer_hidden_ave,p_infer_hidden_max,\ q_infer_hidden_ave,q_infer_hidden_max,\ c_infer_hidden_ave,c_infer_hidden_max,\ matched_q_self,matched_p_self, matched_c_self],-1)) logits = self.logits_linear(infer_linear) proba = F.sigmoid(logits.squeeze(1)) return proba