def __init__(self, wrd_vocab, pos_vocab, non_vocab, config): super(CMN, self).__init__() self.config= config self.dropout = self.config['dropout'] self.hdim = self.config['n_hidden'] self.wdim = self.config['word_dim'] self.feat_box = self.config['feat_box'] self.use_outer = self.config['use_outer'] self.fusion = self.config['fusion'] self.debug = self.config['debug'] self.evaluate = False self.Wwrd = nn.Embedding(len(wrd_vocab), self.wdim) self.w2i = wrd_vocab self.SMAX = nn.Softmax() self.LSMAX = nn.LogSoftmax() self.SIGM = nn.Sigmoid() self.RELU = nn.ReLU() self.TANH = nn.Tanh() self.DROP = nn.Dropout(self.dropout) self.WscrSUB = nn.Linear(self.hdim*4, 1) self.WscrOBJ = nn.Linear(self.hdim*4, 1) self.WscrREL = nn.Linear(self.hdim*4, 1) self.rnn0 = nn.LSTM(input_size = self.wdim ,hidden_size = self.hdim, num_layers = 1,bidirectional = True, dropout = self.dropout) self.rnn1 = nn.LSTM(input_size = self.hdim*2,hidden_size = self.hdim, num_layers = 1,bidirectional = True, dropout = self.dropout, bias = False) self.h00 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) self.c00 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) self.h01 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) self.c01 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) init_forget(self.rnn0) init_forget(self.rnn1) self.Wbox = nn.Linear(self.feat_box, self.wdim) if self.fusion == 'concat': # self.Wout0 = nn.Linear(2*self.wdim, self.wdim) self.Wout0 = nn.Linear(2*self.wdim, 1) # self.Wrel0 = nn.Linear(2*self.wdim, self.wdim) else: # self.Wout0 = nn.Linear(self.wdim, self.wdim) self.Wout0 = nn.Linear(self.wdim, 1) # self.Wrel0 = nn.Linear(self.wdim, self.hdim) # self.Wout1 = nn.Linear(self.wdim, 1) # if self.use_outer: # self.Wrbox= nn.Linear(self.feat_box*2 + ((5+1)**2), self.wdim) # else: # self.Wrbox= nn.Linear(self.feat_box*2, self.wdim) self.Wrbox= nn.Linear(5*2, self.wdim) self.Wrel1 = nn.Linear(self.wdim, 1)
def ENCODE(self, words, orig_tree=None): word_rep = self.Wwrd(makevar(words)).squeeze(0) word_seq = word_rep.view(word_rep.size(0), 1, word_rep.size(1)) output0, (ht0, ct0) = self.rnn0(word_seq, (self.h00, self.c00)) output1, (ht1, ct1) = self.rnn1(output0, (self.h01, self.c01)) outputs = torch.cat([ output0.view(output0.size(0), -1), output1.view(output1.size(0), -1) ], 1) # print "" # print "_"*20 # print "D>>outputs",outputs.size() # print "D>>outputs",outputs[-1].view(1,outputs[-1].size(0)).size() qsub = self.WprojSUB(outputs[-1].view(1, outputs[-1].size(0))) # scores = self.WscrOBJ(self.DROP(outputs)).t() # att_obj = self.SMAX(scores).t() # weighted = att_obj.expand_as(word_rep) * word_rep qobj = self.WprojOBJ(outputs[-1].view(1, outputs[-1].size(0))) # scores = self.WscrREL(self.DROP(outputs)).t() # att_rel = self.SMAX(scores).t() # weighted = att_rel.expand_as(word_rep) * word_rep qrel = self.WprojREL(outputs[-1].view(1, outputs[-1].size(0))) if self.debug: print "ENC>att_sub:", printVec(att_sub.t()) print "ENC>att_obj:", printVec(att_obj.t()) print "ENC>att_rel:", printVec(att_rel.t()) return qsub, qobj, qrel
def ENCODE(self, words, orig_tree = None): word_rep = self.Wwrd(makevar(words)).squeeze(0) word_seq = word_rep.view(word_rep.size(0),1,word_rep.size(1)) output0, (ht0, ct0) = self.rnn0(word_seq, (self.h00, self.c00)) output1, (ht1, ct1) = self.rnn1(output0, (self.h01, self.c01)) outputs = torch.cat( [output0.view(output0.size(0),-1),output1.view(output1.size(0),-1)],1) scores = self.WscrSUB(self.DROP(outputs)).t() att_sub = self.SMAX(scores).t() weighted = att_sub.expand_as(word_rep) * word_rep qsub = torch.sum(weighted, 0) scores = self.WscrOBJ(self.DROP(outputs)).t() att_obj = self.SMAX(scores).t() weighted = att_obj.expand_as(word_rep) * word_rep qobj = torch.sum(weighted, 0) scores = self.WscrREL(self.DROP(outputs)).t() att_rel = self.SMAX(scores).t() weighted = att_rel.expand_as(word_rep) * word_rep qrel = torch.sum(weighted, 0) if self.debug: print "ENC>att_sub:",printVec(att_sub.t()) print "ENC>att_obj:",printVec(att_obj.t()) print "ENC>att_rel:",printVec(att_rel.t()) return qsub,qobj,qrel
def ENCODE(self, words): word_rep = torch.cat( [self.Wwrd(makevar(self.w2i.get(word, 0)), ) for word in words], 0) averaged = torch.mean(word_rep, 0) for i, ff in enumerate(self.Wff): averaged = self.RELU(self.DROP(ff(averaged))) if i == len(self.Wff) - 1: break return self.TANH(self.Wff[-1](averaged))
def expr_for_txt(self, tree, decorate=False): assert (not tree.isleaf()) if len(tree.children) == 1: assert (tree.children[0].isleaf()) pos_emb = self.Wpos(makevar(self.p2i.get(tree.label, 0))) wrd_emb = self.Wwrd( makevar(self.w2i.get(tree.children[0].label, 0))) return torch.cat((pos_emb, wrd_emb), 1) assert (len(tree.children) == 2), tree.children[0] e_l = self.expr_for_txt(tree.children[0], decorate) e_r = self.expr_for_txt(tree.children[1], decorate) non = self.Wnon(makevar(self.n2i.get(tree.label, 0))) chd = self.Wcompose(self.DROP(torch.cat((e_l, e_r), 1))) exp = self.TANH(non + chd) if decorate: tree._expr = exp return exp
def ENCODE(self, words, orig_tree = None): word_rep = self.Wwrd(makevar(words)).squeeze(0) word_seq = word_rep.view(word_rep.size(0),1,word_rep.size(1)) h00 = c00 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) h01 = c01 = makevar(np.zeros((2,1,self.hdim)),numpy_var = True) output0, (ht0, ct0) = self.rnn0(word_seq, (h00, c00)) output1, (ht1, ct1) = self.rnn1(output0, (h01, c01)) outputs = torch.cat( [output0.view(output0.size(0),-1),output1.view(output1.size(0),-1)],1) scores = self.WscrSUB(self.DROP(outputs)).t() att_sub = self.SMAX(scores).t() weighted = att_sub.expand_as(word_rep) * word_rep qsub = torch.sum(weighted, 0) if self.debug: print "ENC>att_sub:",printVec(att_sub.t()) return qsub
def CONTEXT(self, words, orig_tree = None): word_rep = self.WDROP(self.Wwrd(makevar(words)).squeeze(0)) word_seq = word_rep.view(word_rep.size(0),1,word_rep.size(1)) if self.encoder == "lstm": output0, (ht0, ct0) = self.rnn0_ctx(word_seq, (self.h00_ctx, self.c00_ctx)) output1, (ht1, ct1) = self.rnn1_ctx(output0, (self.h01_ctx, self.c01_ctx)) else: raise NotImplementedError() context = torch.cat([output0[-1,:],output1[-1,:]],1) return context
def ENCODE(self, words, ctx, orig_tree = None): word_rep = self.WDROP(self.Wwrd(makevar(words)).squeeze(0)) word_ctx = torch.cat([word_rep,ctx.repeat(word_rep.size(0),1)],1) word_seq = word_ctx.view(word_ctx.size(0),1,word_ctx.size(1)) if self.encoder == "lstm": output0, (ht0, ct0) = self.rnn0(word_seq, (self.h00, self.c00)) output1, (ht1, ct1) = self.rnn1(output0, (self.h01, self.c01)) else: raise NotImplementedError() outputs = torch.cat( [output0.view(output0.size(0),-1),output1.view(output1.size(0),-1)],1) scores = self.Wscr(self.DROP(outputs)).t() attention = self.SMAX(scores).t() weighted = attention.expand_as(word_rep) * word_rep waverage = torch.sum(weighted, 0) context = torch.cat([output0[-1,:],output1[-1,:]],1) return waverage, attention, context
def ENCODE(self, words, orig_tree=None): word_rep = self.WDROP(self.Wwrd(makevar(words)).squeeze(0)) word_seq = word_rep.view(word_rep.size(0), 1, word_rep.size(1)) if self.encoder == "lstm": output0, (ht0, ct0) = self.rnn0(word_seq, (self.h00, self.c00)) output1, (ht1, ct1) = self.rnn1(output0, (self.h01, self.c01)) else: output0, ht0 = self.rnn0(word_seq, self.h00) output1, ht1 = self.rnn1(output0, self.h01) outputs = torch.cat([ output0.view(output0.size(0), -1), output1.view(output1.size(0), -1) ], 1) scores = self.Wscr(self.DROP(outputs)).t() attention = self.SMAX(scores).t() weighted = attention.expand_as(word_rep) * word_rep waverage = torch.sum(weighted, 0) return waverage, attention
else: raise NotImplementedError() if args.debug_mode: raise NotImplementedError() else: if config['model'] in set(["groundnet", "groundnetflexall", "groundnetflexrel","treernn"]): prediction = net(tree, box_rep, tree) else: prediction = net([w2i.get(n.label,0) for n in tree.leaves()], box_rep, tree) _,pred = torch.max(prediction.data,1) correct += (1.0 if pred[0][0] in set(Ytrn[ii]) else 0.0) if config['loss'] == 'nll': gold = makevar(Ytrn[ii][0]) elif config['loss'] == 'smargin': gold = np.zeros((1,box_rep.size(0))) np.put(gold,Ytrn[ii],1.0) gold = makevar(gold, numpy_var = True).view(1,box_rep.size(0)) elif config['loss'] == 'lamm': prediction = (prediction,Xtrn_iou[ii]) gold = Ytrn[ii] elif config['loss'] == 'mbr': prediction = (prediction,Xtrn_iou[ii]) gold = Ytrn[ii] else: raise NotImplementedError() loss = criterion(prediction, gold) if math.isnan(float(loss.data[0])):
box_rep = box_feats elif config['box_usage'] == 2: box_rep = spat_feats else: raise NotImplementedError() if args.debug_mode: raise NotImplementedError() else: prediction = net([w2i.get(n.label, 0) for n in tree.leaves()], box_rep, tree) _, pred = torch.max(prediction.data, 1) correct += (1.0 if pred[0][0] in set(gold_instance) else 0.0) if config['loss'] == 'nll': gold = makevar(gold_instance[0]) elif config['loss'] == 'smargin': gold = np.zeros((1, box_rep.size(0))) np.put(gold, gold_instance, 1.0) gold = makevar(gold, numpy_var=True).view(1, box_rep.size(0)) else: raise NotImplementedError() loss = criterion(prediction, gold) closs += float(loss.data[0]) cinst += 1 optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(net.parameters(), config['clip']) optimizer.step()
def __init__(self, wrd_vocab, pos_vocab, non_vocab, config): super(GroundNET, self).__init__() self.config = config self.dropout = self.config['dropout'] self.hdim = self.config['n_hidden'] self.layer = self.config['n_layer'] self.wdim = self.config['word_dim'] self.feat_box = self.config['feat_box'] self.use_outer = self.config['use_outer'] self.fusion = self.config['fusion'] self.debug = self.config['debug'] self.encoder = self.config['encoder'] self.only_spatial = self.config['only_spatial'] self.Wwrd = nn.Embedding(len(wrd_vocab), self.wdim) self.w2i = wrd_vocab self.Wbox = nn.Linear(self.feat_box, self.wdim) if self.use_outer: if self.only_spatial: self.Wrbox = nn.Linear(5 * 2 + ((5 + 1)**2), self.wdim) else: self.Wrbox = nn.Linear(self.feat_box * 2 + ((5 + 1)**2), self.wdim) else: if self.only_spatial: self.Wrbox = nn.Linear(5 * 2, self.wdim) else: self.Wrbox = nn.Linear(self.feat_box * 2, self.wdim) self.SMAX = nn.Softmax() self.LSMAX = nn.LogSoftmax() self.SIGM = nn.Sigmoid() self.RELU = nn.ReLU() self.TANH = nn.Tanh() self.DROP = nn.Dropout(self.dropout) self.WDROP = WordDropout(self.dropout) if self.fusion == 'concat': out0_dim = self.wdim * 2 else: out0_dim = self.wdim if self.layer == 1 and self.fusion == 'concat': out1_dim = self.wdim * 2 else: out1_dim = self.wdim self.Wrel0 = nn.Linear(out0_dim, self.wdim) self.Wrel1 = nn.Linear(out1_dim, 1) self.Wout0 = nn.Linear(out0_dim, self.wdim) self.Wout1 = nn.Linear(out1_dim, 1) self.Wscr = nn.Linear(self.hdim * 4, 1) if self.encoder == "lstm": self.rnn0 = nn.LSTM(input_size=self.wdim, hidden_size=self.hdim, num_layers=1, bidirectional=True, dropout=self.dropout) self.rnn1 = nn.LSTM(input_size=self.hdim * 2, hidden_size=self.hdim, num_layers=1, bidirectional=True, dropout=self.dropout, bias=False) init_forget(self.rnn0) init_forget(self.rnn1) elif self.encoder == "gru": self.rnn0 = nn.GRU(input_size=self.wdim, hidden_size=self.hdim, num_layers=1, bidirectional=True, dropout=self.dropout) self.rnn1 = nn.GRU(input_size=self.hdim * 2, hidden_size=self.hdim, num_layers=1, bidirectional=True, dropout=self.dropout) else: raise NotImplementedError() self.h00 = makevar(np.zeros((2, 1, self.hdim)), numpy_var=True) self.c00 = makevar(np.zeros((2, 1, self.hdim)), numpy_var=True) self.h01 = makevar(np.zeros((2, 1, self.hdim)), numpy_var=True) self.c01 = makevar(np.zeros((2, 1, self.hdim)), numpy_var=True)
elif config['box_usage'] == 1: box_rep = box_feats elif config['box_usage'] == 2: box_rep = spat_feats else: raise NotImplementedError() if False: #args.debug_mode: raise NotImplementedError() else: prediction = net([w2i.get(n.label, 0) for n in tree.leaves()], box_rep) pred = int(np.round(prediction.data[0][0])) correct += (1.0 if pred == Ytrn[ii] else 0.0) gold = makevar(np.array([Ytrn[ii]]), numpy_var=True) loss = criterion(prediction, gold) closs += float(loss.data[0]) cinst += 1 optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(net.parameters(), config['clip']) optimizer.step() if args.verbose: pbar.set_description("trn_loss {:5.3f} trn_acc {:5.3f}".format( closs / cinst, correct / cinst)) if time.time() - start_time > args.timeout: timeout = True break