class Model(object): def __init__(self): self.device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') self.model = DGCNN(256,char_file = config.char_embedding_path,\ word_file = config.word_embedding_path).to(self.device) self.epoches = 150 self.lr = 1e-4 self.print_step = 15 self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.model.parameters()),\ lr=self.lr) self.best_model = DGCNN(256,char_file=config.char_embedding_path,\ word_file = config.word_embedding_path).to(self.device) self._val_loss = 1e12 #Debug def train(self,train_data,dev_data,threshold=0.1): for epoch in range(self.epoches): self.model.train() for i,item in enumerate(train_data): self.optimizer.zero_grad() Qc,Qw,q_mask,Ec,Ew,e_mask,As,Ae = [i.to(self.device) for i in item] As_, Ae_ = self.model([Qc,Qw,q_mask,Ec,Ew,e_mask]) As_loss=focal_loss(As,As_,self.device) Ae_loss=focal_loss(Ae,Ae_,self.device) # batch_size, max_seq_len_e mask=e_mask==1 loss=(As_loss.masked_select(mask).sum()+Ae_loss.masked_select(mask).sum()) / e_mask.sum() loss.backward() self.optimizer.step() if (i+1)%self.print_step==0 or i==len(train_data)-1: logger.info("In Training : Epoch : {} \t Step / All Step : {} / {} \t Loss of every char : {}"\ .format(epoch+1, i+1,len(train_data),loss.item()*100)) #debug # if i==2000: # break self.model.eval() with torch.no_grad(): self.validate(dev_data) def test(self,test_data,threshold=0.1): self.best_model.eval() self.best_model.to(self.device) with torch.no_grad(): sl,el,sl_,el_=[],[],[],[] for i, item in enumerate(test_data): Qc,Qw,q_mask,Ec,Ew,e_mask,As,Ae = [i.to(self.device) for i in item] mask=e_mask==1 As_,Ae_ = self.model([Qc,Qw,q_mask,Ec,Ew,e_mask]) As_,Ae_,As,Ae = [ i.masked_select(mask).cpu().numpy() for i in [As_,Ae_,As,Ae]] As_,Ae_ = np.where(As_>threshold,1,0), np.where(Ae_>threshold,1,0) As,Ae = As.astype(int),Ae.astype(int) sl.append(As) el.append(Ae) sl_.append(As_) el.append(el_) a=binary_confusion_matrix_evaluate(np.concatenate(sl),np.concatenate(sl_)) b=binary_confusion_matrix_evaluate(np.concatenate(el),np.concatenate(el_)) logger.info('In Test DataSet: START EVALUATION:\t Acc : {}\t Prec : {}\t Recall : {}\t F1-score : {}'\ .format(a[0],a[1],a[2],a[3])) logger.info('In Test DataSet: START EVALUATION:\t Acc : {}\t Prec : {}\t Recall : {}\t F1-score : {}'\ .format(b[0],b[1],b[2],b[3])) def validate(self,dev_data,threshold=0.1): val_loss=[] # import pdb; pdb.set_trace() for i, item in enumerate(dev_data): Qc,Qw,q_mask,Ec,Ew,e_mask,As,Ae = [i.to(self.device) for i in item] As_, Ae_ = self.model([Qc,Qw,q_mask,Ec,Ew,e_mask]) #cal loss As_loss,Ae_loss=focal_loss(As,As_,self.device) ,focal_loss(Ae,Ae_,self.device) mask=e_mask==1 loss=(As_loss.masked_select(mask).sum() + Ae_loss.masked_select(mask).sum()) / e_mask.sum() if (i+1)%self.print_step==0 or i==len(dev_data)-1: logger.info("In Validation: Step / All Step : {} / {} \t Loss of every char : {}"\ .format(i+1,len(dev_data),loss.item()*100)) val_loss.append(loss.item()) As_,Ae_,As,Ae = [ i.masked_select(mask).cpu().numpy() for i in [As_,Ae_,As,Ae]] As_,Ae_ = np.where(As_>threshold,1,0), np.where(Ae_>threshold,1,0) As,Ae = As.astype(int),Ae.astype(int) acc,prec,recall,f1=binary_confusion_matrix_evaluate(As,As_) logger.info('START EVALUATION :\t Acc : {}\t Prec : {}\t Recall : {}\t F1-score : {}'\ .format(acc,prec,recall,f1)) acc,prec,recall,f1=binary_confusion_matrix_evaluate(Ae,Ae_) logger.info('END EVALUATION :\t Acc : {}\t Prec : {}\t Recall : {}\t F1-score : {}'\ .format(acc,prec,recall,f1)) # [ , seq_len] l=sum(val_loss)/len(val_loss) logger.info('In Validation, Average Loss : {}'.format(l*100)) if l<self._val_loss: logger.info('Update best Model in Valiation Dataset') self._val_loss=l self.best_model=deepcopy(self.model) def load_model(self,PATH): self.best_model.load_state_dict(torch.load(PATH)) self.best_model.eval() def save_model(self,PATH): torch.save(self.best_model.state_dict(),PATH) logger.info('save best model successfully') ''' 这里的Data是指含有原始文本的数据List[ dict ] - test_data | - { 'question', 'evidences', 'answer'} ''' def get_test_answer(self,test_data,word2id,char2id): all_item = len(test_data) t1=0. t3=0. t5=0. self.best_model.eval() with torch.no_grad(): for item in test_data: q_text = item['question'] e_texts = item['evidences'] a = item['answer'] a_ = extract_answer(q_text,e_texts,word2id,char2id) # a_ list of [ answer , possibility] n=len(a_) a_1 = {i[0] for i in a_[:1]} a_3 = {i[0] for i in a_[:3]} a_5 = {i[0] for i in a_[:5]} if a[0] == 'no_answer' and n==0: t1+=1 t3+=1 t5+=1 if [i for i in a if i in a_1]: t1+=1 if [i for i in a if i in a_3]: t3+=1 if [i for i in a if i in a_5]: t5+=1 logger.info('In Test Raw File') logger.info('Top One Answer : Acc : {}'.format(t1/all_item)) logger.info('Top Three Answer : Acc : {}'.format(t3/all_item)) logger.info('Top Five Answer : Acc : {}'.format(t5/all_item)) def extract_answer(self,q_text,e_texts,word2id,char2id,maxlen=10,threshold=0.1): Qc,Qw,Ec,Ew= [],[],[],[] qc = list(q_text) Qc,q_mask=sent2id([qc],char2id) qw = alignWord2Char(tokenize(q_text)) Qw,q_mask_=sent2id([qw],word2id) assert torch.all(q_mask == q_mask_) tmp = [(list(e),alignWord2Char(tokenize(e))) for e in e_texts] ec,ew = zip(*tmp) Ec,e_mask=sent2id(list(ec),char2id) Ew,e_mask_=sent2id(list(ew),word2id) assert torch.all(e_mask == e_mask_) totensor=lambda x: torch.from_numpy(np.array(x)).long() L=[Qc,Qw,q_mask,Ec,Ew,e_mask] L=[totensor(x) for x in L] As_ , Ae_ = self.best_model(L) R={} for as_ ,ae_ , e in zip(As_,Ae_,e_texts): as_ ,ae_ = as_[:len(e)].numpy() , ae_[:len(e)].numpy() sidx = torch.where(as_>threshold)[0] eidx = torch.where(ae_>threshold)[0] result = { } for i in sidx: cond = (eidx >= i) & (eidx < i+maxlen) for j in eidx[cond]: key=e[i:j+1] result[key]=max(result.get(key,0),as_[i] * ae_[j]) if result: for k,v in result.items(): if k not in R: R[k]=[] R[k].append(v) # sort all answer R= [ [k,((np.array(v)**2).sum()/(sum(v)+1))] for k , v in R.items() ] R.sort(key=lambda x: x[1], reversed=True) # R 降序排列的 (answer, possibility) return R
model = DGCNN(args.emb_dims, args.k, output_channels=40) elif args.model.lower() == 'pointnet': model = PointNetCls(k=40, feature_transform=args.feature_transform) elif args.model.lower() == 'pointnet2': model = PointNet2ClsSsg(num_classes=40) elif args.model.lower() == 'pointconv': model = PointConvDensityClsSsg(num_classes=40) else: print('Model not recognized') exit(-1) # load model weight state_dict = torch.load(BEST_WEIGHTS[args.model], map_location='cpu') print('Loading weight {}'.format(BEST_WEIGHTS[args.model])) try: model.load_state_dict(state_dict) except RuntimeError: # eliminate 'module.' in keys state_dict = {k[7:]: v for k, v in state_dict.items()} model.load_state_dict(state_dict) # distributed mode on multiple GPUs! # much faster than nn.DataParallel model = DistributedDataParallel(model.cuda(), device_ids=[args.local_rank]) # setup attack settings if args.adv_func == 'logits': adv_func = LogitsAdvLoss(kappa=args.kappa) else: adv_func = CrossEntropyAdvLoss() dist_func = FarChamferDist(num_add=args.num_add,
class DGCNN_exit1(nn.Module): def __init__(self, args, output_channels=40): super(DGCNN_exit1, self).__init__() self.args = args self.DGCNN = DGCNN(args) dict_tmp = torch.load('./pretrained/model.1024.t7') new_state_dict = OrderedDict() #print(dict_tmp) for name, tensor in dict_tmp.items(): #print(name) name = name[7:] new_state_dict[name] = tensor self.DGCNN.load_state_dict(new_state_dict) self.k = 20 for para in self.DGCNN.parameters(): para.requires_grad = False self.exit1_conv = nn.Sequential( nn.Conv1d(64, 256, kernel_size=1, bias=False), nn.BatchNorm1d(256), nn.LeakyReLU(negative_slope=0.2), ) self.exit1_fc2 = nn.Sequential( nn.Linear(512, 1536), nn.BatchNorm1d(1536), nn.LeakyReLU(negative_slope=0.2), ) self.exit1_predict = nn.Sequential( nn.Linear(1536, 512), nn.BatchNorm1d(512), nn.LeakyReLU(negative_slope=0.2), nn.Dropout(0.5), nn.Linear(512, 256), nn.BatchNorm1d(256), nn.LeakyReLU(negative_slope=0.2), nn.Dropout(0.5), nn.Linear(256, 128), nn.BatchNorm1d(128), nn.LeakyReLU(negative_slope=0.2), nn.Dropout(0.5), nn.Linear(128, 40), nn.BatchNorm1d(40), nn.LeakyReLU(negative_slope=0.2), ) def forward(self, x, noise_factor=0.1): batch_size = x.size(0) x = get_graph_feature( x, k=self.k) # [batch_size, dim=3 * 2, point_num, k] x = self.DGCNN.conv1(x) x1 = x.max(dim=-1, keepdim=False)[0] # [batch_size, dim = 64, point_num] x = x1 # do not need to concate #exit 1 x = self.exit1_conv(x) x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1) # (batch_size, dimension) x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1) # (batch_size, dimension) x = torch.cat((x1, x2), 1) x = self.exit1_fc2(x) #awgn channel model #x = awgn_channel(x,0.1) # 20dB x = awgn_channel(x, self.args.channel_noise) x = self.exit1_predict(x) return x