def build_typing_part(self): # Place Holders self.keep_prob = tf.placeholder(tf.float32, name="keep_prob") self.mention_representation = tf.placeholder(tf.float32, [None, self.emb_dim], name="mention_repr") self.context = [ tf.placeholder(tf.float32, [None, self.emb_dim], name="context" + str(i)) for i in range(self.context_length * 2 + 1) ] self.target = tf.placeholder(tf.float32, [None, self.target_dim], name="target") ### dropout and splitting context into left and right self.mention_representation_dropout = tf.nn.dropout( self.mention_representation, self.keep_prob) self.left_context = self.context[:self.context_length] self.right_context = self.context[self.context_length + 1:] # Averaging Encoder if self.encoder == "averaging": self.left_context_representation = tf.add_n(self.left_context) self.right_context_representation = tf.add_n(self.right_context) self.context_representation = tf.concat(1, [ self.left_context_representation, self.right_context_representation ]) # LSTM Encoder if self.encoder == "lstm": self.left_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) with tf.variable_scope("rnn_left") as scope: self.left_rnn, _ = tf.nn.rnn(self.left_lstm, self.left_context, dtype=tf.float32) with tf.variable_scope("rnn_right") as scope: self.right_rnn, _ = tf.nn.rnn(self.right_lstm, list(reversed( self.right_context)), dtype=tf.float32) self.context_representation = tf.concat( 1, [self.left_rnn[-1], self.right_rnn[-1]]) # Attentive Encoder if self.encoder == "attentive": self.left_lstm_F = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm_F = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.left_lstm_B = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm_B = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) with tf.variable_scope("rnn_left") as scope: self.left_birnn, _, _ = tf.nn.bidirectional_rnn( self.left_lstm_F, self.left_lstm_B, self.left_context, dtype=tf.float32) with tf.variable_scope("rnn_right") as scope: self.right_birnn, _, _ = tf.nn.bidirectional_rnn( self.right_lstm_F, self.right_lstm_B, list(reversed(self.right_context)), dtype=tf.float32) self.context_representation, self.attentions = attentive_sum( self.left_birnn + self.right_birnn, input_dim=self.lstm_dim * 2, hidden_dim=self.att_dim) # Logistic Regression if self.feature: self.features = tf.placeholder(tf.int32, [None, self.feature_input_dim]) self.feature_embeddings = weight_variable( 'feat_embds', (self.feature_size, self.feature_dim), True) self.feature_representation = tf.nn.dropout( tf.reduce_sum( tf.nn.embedding_lookup(self.feature_embeddings, self.features), 1), self.keep_prob) self.representation = tf.concat(1, [ self.mention_representation_dropout, self.context_representation, self.feature_representation ]) else: self.representation = tf.concat(1, [ self.mention_representation_dropout, self.context_representation ]) if self.hier: _d = "Wiki" if self.type == "figer" else "OntoNotes" S = create_prior("./resource/" + _d + "/label2id_" + self.type + ".txt") assert (S.shape == (self.target_dim, self.target_dim)) self.S = tf.constant(S, dtype=tf.float32) self.V = weight_variable('hier_V', (self.target_dim, self.rep_dim)) self.W = tf.transpose(tf.matmul(self.S, self.V)) self.logit = tf.matmul(self.representation, self.W) else: self.W = weight_variable('hier_W', (self.rep_dim, self.target_dim)) self.logit = tf.matmul(self.representation, self.W) self.distribution = tf.nn.sigmoid(self.logit) self.type_loss = tf.nn.sigmoid_cross_entropy_with_logits( self.logit, self.target)
def __init__(self, type="figer", encoder="averaging", hier=False, feature=False): # Argument Checking assert (encoder in ["averaging", "lstm", "attentive"]) assert (type in ["figer", "gillick"]) self.type = type self.encoder = encoder self.hier = hier self.feature = feature # Hyperparameters self.context_length = 10 self.emb_dim = 300 self.target_dim = 113 if type == "figer" else 89 self.feature_size = 600000 if type == "figer" else 100000 self.learning_rate = 0.001 self.lstm_dim = 100 self.att_dim = 100 # dim of attention module self.feature_dim = 50 # dim of feature representation self.feature_input_dim = 70 if encoder == "averaging": self.rep_dim = self.emb_dim * 3 else: self.rep_dim = self.lstm_dim * 2 + self.emb_dim if feature: self.rep_dim += self.feature_dim # Place Holders self.keep_prob = tf.placeholder(tf.float32) self.mention_representation = tf.placeholder(tf.float32, [None, self.emb_dim]) self.context = [ tf.placeholder(tf.float32, [None, self.emb_dim]) for _ in range(self.context_length * 2 + 1) ] self.target = tf.placeholder(tf.float32, [None, self.target_dim]) ### dropout and splitting context into left and right self.mention_representation_dropout = tf.nn.dropout( self.mention_representation, self.keep_prob) self.left_context = self.context[:self.context_length] self.right_context = self.context[self.context_length + 1:] # Averaging Encoder if encoder == "averaging": self.left_context_representation = tf.add_n(self.left_context) self.right_context_representation = tf.add_n(self.right_context) self.context_representation = tf.concat(1, [ self.left_context_representation, self.right_context_representation ]) # LSTM Encoder if encoder == "lstm": self.left_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) with tf.variable_scope("rnn_left") as scope: self.left_rnn, _ = tf.nn.rnn(self.left_lstm, self.left_context, dtype=tf.float32) with tf.variable_scope("rnn_right") as scope: self.right_rnn, _ = tf.nn.rnn(self.right_lstm, list(reversed( self.right_context)), dtype=tf.float32) self.context_representation = tf.concat( 1, [self.left_rnn[-1], self.right_rnn[-1]]) # Attentive Encoder if encoder == "attentive": self.left_lstm_F = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm_F = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.left_lstm_B = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) self.right_lstm_B = tf.nn.rnn_cell.LSTMCell(self.lstm_dim, state_is_tuple=True) with tf.variable_scope("rnn_left") as scope: self.left_birnn, _, _ = tf.nn.bidirectional_rnn( self.left_lstm_F, self.left_lstm_B, self.left_context, dtype=tf.float32) with tf.variable_scope("rnn_right") as scope: self.right_birnn, _, _ = tf.nn.bidirectional_rnn( self.right_lstm_F, self.right_lstm_B, list(reversed(self.right_context)), dtype=tf.float32) self.context_representation, self.attentions = attentive_sum( self.left_birnn + self.right_birnn, input_dim=self.lstm_dim * 2, hidden_dim=self.att_dim) # Logistic Regression if feature: self.features = tf.placeholder(tf.int32, [None, self.feature_input_dim]) self.feature_embeddings = weight_variable( (self.feature_size, self.feature_dim)) self.feature_representation = tf.nn.dropout( tf.reduce_sum( tf.nn.embedding_lookup(self.feature_embeddings, self.features), 1), self.keep_prob) self.representation = tf.concat(1, [ self.mention_representation_dropout, self.context_representation, self.feature_representation ]) else: self.representation = tf.concat(1, [ self.mention_representation_dropout, self.context_representation ]) if hier: _d = "Wiki" if type == "figer" else "OntoNotes" S = create_prior("./resource/" + _d + "/label2id_" + type + ".txt") assert (S.shape == (self.target_dim, self.target_dim)) self.S = tf.constant(S, dtype=tf.float32) self.V = weight_variable((self.target_dim, self.rep_dim)) self.W = tf.transpose(tf.matmul(self.S, self.V)) self.logit = tf.matmul(self.representation, self.W) else: self.W = weight_variable((self.rep_dim, self.target_dim)) self.logit = tf.matmul(self.representation, self.W) self.distribution = tf.nn.sigmoid(self.logit) # Loss Function self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(self.logit, self.target)) # Optimizer self.optim = tf.train.AdamOptimizer(self.learning_rate).minimize( self.loss) # Session self.init = tf.initialize_all_variables() self.session = tf.Session() self.session.run(self.init)
concat = tf.concat(1, [out_context, out_target, out_femb]) #F_DIM = 0 # No Gillick if args.encoder in ["attention", "lstm"]: W = tf.Variable( tf.random_uniform( [LABEL_SIZE, INPUT_SIZE + args.lstm_hidden_size * 2 + F_DIM], minval=-0.01, maxval=0.01)) else: W = tf.Variable( tf.random_uniform([LABEL_SIZE, INPUT_SIZE * 3 + F_DIM], minval=-0.01, maxval=0.01)) S = tf.constant(create_prior("../../resource/label2id_" + DATA + ".txt"), dtype=tf.float32) V = tf.matmul(S, W) pre_activation = tf.matmul(concat, V, transpose_b=True) # prior #pre_activation = tf.matmul(concat,W,transpose_b=True) # no prior output = tf.nn.sigmoid(pre_activation) ## loss,optimizer,init loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits( pre_activation, y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) init = tf.initialize_all_variables() ## batcher print "loading dataset..."
out_context = tf.concat(1,[context_left,context_right]) ### entity out_target = tf.nn.dropout(x_target,keep_prob_target) ## output concat = tf.concat(1,[out_context,out_target,out_femb]) #F_DIM = 0 # No Gillick if args.encoder in ["attention","lstm"]: W = tf.Variable(tf.random_uniform([LABEL_SIZE, INPUT_SIZE+args.lstm_hidden_size*2+F_DIM],minval=-0.01, maxval=0.01)) else: W = tf.Variable(tf.random_uniform([LABEL_SIZE, INPUT_SIZE*3+F_DIM],minval=-0.01, maxval=0.01)) S = tf.constant(create_prior("../../resource/label2id_"+DATA+".txt"),dtype=tf.float32) V = tf.matmul(S,W) pre_activation = tf.matmul(concat,V,transpose_b=True) # prior #pre_activation = tf.matmul(concat,W,transpose_b=True) # no prior output = tf.nn.sigmoid(pre_activation) ## loss,optimizer,init loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(pre_activation, y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(loss) init = tf.initialize_all_variables() ## batcher
args.context_length) out_context, attention_context = modules.importance( rnn_out_context_left + rnn_out_context_right, args.lstm_hidden_size * 2, args.attention_hidden_size, args.context_length * 2) ### entity out_target = tf.nn.dropout(x_target, keep_prob_target) ## output concat = tf.concat(1, [out_context, out_target]) W = tf.Variable( tf.random_uniform([LABEL_SIZE, INPUT_SIZE + args.lstm_hidden_size * 2], minval=-0.01, maxval=0.01)) S = tf.constant(create_prior("../../resource/label2id_figer.txt"), dtype=tf.float32) V = tf.matmul(S, W) pre_activation = tf.matmul(concat, V, transpose_b=True) # prior #pre_activation = tf.matmul(concat,W,transpose_b=True) # no prior output = tf.nn.sigmoid(pre_activation) # no_prior ## loss,optimizer,init loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits( pre_activation, y)) optimizer = tf.train.AdamOptimizer(0.01).minimize(loss) init = tf.initialize_all_variables() ## batcher
def __init__(self,type = "figer", encoder = "averaging", hier = False, feature = False, gaussian = False, margin = 1, negtive_size = 1, gaussian_dim = 20, regularize = False, minval = 1.0, maxval = 5.0, bag_strategy = "one", bags = False): super(Model, self).__init__() # Argument Checking assert(encoder in ["averaging", "lstm", "attentive","type_att"]) assert(type in ["figer", "gillick"]) self.type = type self.encoder = encoder self.hier = hier self.feature = feature # Hyperparameters self.context_length = 10 self.emb_dim = 300 self.target_dim = 113 if type == "figer" else 89 self.feature_size = 600000 if type == "figer" else 100000 self.learning_rate = 0.001 self.lstm_dim = 100 self.att_dim = 100 # dim of attention module self.feature_dim = 50 # dim of feature representation self.feature_input_dim = 70 self.bags = bags self.bag_strategy = bag_strategy self.lstm_layers = 1 if encoder == "averaging": self.rep_dim = self.emb_dim * 3 else: self.rep_dim = self.lstm_dim * 2 + self.emb_dim if feature: self.rep_dim += self.feature_dim if self.encoder == "lstm": self.left_lstm = nn.LSTM(self.emb_dim,self.lstm_dim,self.lstm_layers) self.right_lstm = nn.LSTM(self.emb_dim,self.lstm_dim,self.lstm_layers) elif self.encoder == "attentive" or self.encoder == "type_att": self.left_lstm = nn.LSTM(self.emb_dim,self.lstm_dim,self.lstm_layers,bidirectional=True) self.right_lstm = nn.LSTM(self.emb_dim,self.lstm_dim,self.lstm_layers,bidirectional=True) # self.W_e = Variable(torch.randn(2*self.lstm_dim, self.att_dim),requires_grad=True) # self.W_a = Variable(torch.randn(self.att_dim,1),requires_grad=True) self.W_e = Parameter(torch.randn(2*self.lstm_dim, self.att_dim).uniform_(-0.01,0.01)) self.W_a = Parameter(torch.randn(self.att_dim,1).uniform_(-0.01,0.01)) self.W_e_type = Parameter(torch.randn(self.target_dim,2*self.lstm_dim, self.att_dim).uniform_(-0.01,0.01)) self.W_a_type = Parameter(torch.randn(self.target_dim,self.att_dim,1).uniform_(-0.01,0.01)) if self.feature: self.feature_embeddings = Embedding(self.feature_size, self.feature_dim) if hier: _d = "Wiki" if type == "figer" else "OntoNotes" S = create_prior("./resource/"+_d+"/label2id_"+type+".txt") assert(S.shape == (self.target_dim, self.target_dim)) self.S = Variable(torch.Tensor(S)) self.V = Parameter(torch.randn(self.target_dim,self.rep_dim).uniform_(-0.01,0.01)) else: self.W = Parameter(torch.randn(self.rep_dim,self.target_dim).uniform_(-0.01,0.01)) self.softmax = nn.Softmax() self.bce = nn.BCEWithLogitsLoss() if bags==True: self.A = Parameter(torch.randn(self.rep_dim).uniform_(-0.01,0.01)) self.r = Parameter(torch.randn(self.target_dim,self.rep_dim).uniform_(-0.01,0.01)) self.coarse_set = {}