def main(**kwargs): print('***************A') opt.parse(kwargs) # import ipdb;ipdb.set_trace() files = glob.glob(opt.test_model_path + '/*') total = len(files) now = 0 outfile = opt.ps_outfile label_path = opt.labels_path test_data_path = opt.test_data_path index2qid = np.load(test_data_path)['index2qid'].item() with open(label_path) as f: labels_info = json.load(f) qid2label = labels_info['d'] for file in files: try: now += 1 print('now: ' + str(now) + '/' + str(total)) print(file) if not os.path.isfile(file): print('is path ') continue filepath, file = os.path.split(file) cuts = file.split('_') modelkind = cuts[0] if modelkind == 'CNNText' and cuts[1] == 'inception': modelkind += '_inception' print(modelkind) modeltype = cuts[-2] opt.model_path = os.path.join(opt.test_model_path, file) opt.type_ = modeltype print('************A1************') print(opt.test_data_path) # import ipdb;ipdb.set_trace() model = getattr(models, modelkind)(opt).cuda().eval() print('************A11************') if opt.model_path is not None: model.load(opt.model_path) print('************A2************') opt.parse(kwargs) print('************A3************') opt.type_ = modeltype opt.result_path = os.path.join(opt.test_result_path, file) if not os.path.exists(opt.test_result_path): print('no dir: ' + opt.test_result_path) os.makedirs(opt.test_result_path) model = model.eval() print('checkW') test_data_title, test_data_content, index2qid, labels = load_data( type_=opt.type_) Num = len(test_data_title) result = np.zeros((Num, 25556)) for i in tqdm.tqdm(range(Num)): if i % opt.batch_size == 0 and i > 0: # import ipdb;ipdb.set_trace() title = np.array(test_data_title[i - opt.batch_size:i]) content = np.array(test_data_content[i - opt.batch_size:i]) result[i - opt.batch_size:i, :] = dotest( model, title, content) if Num % opt.batch_size != 0: # original # title=np.array(test_data_title[opt.batch_size*(Num/opt.batch_size):]) # content=np.array(test_data_content[opt.batch_size*(Num/opt.batch_size):]) # result[opt.batch_size*(Num/opt.batch_size):,:]=dotest(model,title,content) title = np.array(test_data_title[opt.batch_size * (Num // opt.batch_size):]) content = np.array(test_data_content[opt.batch_size * (Num // opt.batch_size):]) result[opt.batch_size * (Num // opt.batch_size):, :] = dotest( model, title, content) # import ipdb;ipdb.set_trace() # r = t.from_numpy(result).float() if opt.save_test_result == True: t.save(r, opt.result_path + '.pth') true_labels = [] for ii in range(len(r)): true_labels.append(qid2label[index2qid[ii]]) tmp = r result = (tmp).topk(opt.visible_top_num, 1)[1] predict_label_and_marked_label_list = [ [_1, _2] for _1, _2 in zip(result, true_labels) ] if opt.save_top_result: print('doing top_result') top_result_raw = (tmp).topk(opt.visible_top_num, 1) top_result = top_result_raw[1] t.save(top_result_raw, opt.result_path + '.topPrId' + str(opt.visible_top_num)) _, _, _, _, precision_classes_num = get_score_topk( predict_label_and_marked_label_list, opt.visible_top_num, opt.num_classes) kk = t.Tensor(precision_classes_num) t.save( kk, opt.result_path + '.top' + str(opt.visible_top_num) + '.Pre') print('saved top_result') result = (tmp).topk(5, 1)[1] predict_label_and_marked_label_list = [ [_1, _2] for _1, _2 in zip(result, true_labels) ] score, _, _, ss = get_score(predict_label_and_marked_label_list) print(score) print(ss) f = open(outfile, 'a', encoding='utf-8') writer = csv.writer(f) writer.writerow([file, str(score)]) f.close() if not os.path.exists(os.path.join(filepath + 'dealed')): print('no dir: ' + os.path.join(filepath + 'dealed')) os.makedirs(os.path.join(filepath + 'dealed')) print('move from -- to --') print(os.path.join(filepath, file)) print(os.path.join(filepath + 'dealed')) shutil.move(os.path.join(filepath, file), os.path.join(filepath + 'dealed')) f = open('finish.getprobability.log', 'a', encoding='utf-8') f.write(file + '\n') f.close() except Exception as e: print(file) print(e)
def main(**kwargs): ''' 训练入口 ''' print(opt.test_data_path) print('***********W1**********') opt.parse(kwargs, print_=False) if opt.debug: import ipdb ipdb.set_trace() print('***********W2**********') print(opt.test_data_path) model = getattr(models, opt.model)(opt).cuda() if opt.model_path: model.load(opt.model_path) print(model) print('***********W3**********') opt.parse(kwargs, print_=True) print(opt.test_data_path) pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() dataset = ZhihuData(opt.train_data_path, opt.labels_path, type_=opt.type_, augument=opt.augument) dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay) loss_meter = tnt.meter.AverageValueMeter() score_meter = tnt.meter.AverageValueMeter() best_score = 0 for epoch in range(opt.max_epoch): loss_meter.reset() score_meter.reset() for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): # 训练 更新参数 title, content, label = Variable(title.cuda()), Variable( content.cuda()), Variable(label.cuda()) optimizer.zero_grad() score = model(title, content) # import ipdb;ipdb.set_trace() loss = loss_function(score, opt.weight * label.float()) loss_meter.add(loss.data[0]) loss.backward() optimizer.step() if ii % opt.plot_every == opt.plot_every - 1: if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() predict = score.data.topk(5, dim=1)[1].cpu().tolist() true_target = label.data.float().cpu().topk(5, dim=1) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] predict_label_and_marked_label_list = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( (predict[jj], true.tolist())) score_, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) score_meter.add(score_) print('prec:%s,recall:%s,score:%s,a:%s' % (prec_, recall_, score_, _ss)) print('scores', score_meter.value()[0]) #eval() print('loss', loss_meter.value()[0]) k = t.randperm(label.size(0))[0] if ii % opt.decay_every == opt.decay_every - 1: # 计算在验证集上的分数,并相对应的调整学习率 del loss scores, prec_, recall_, _ss = val(model, dataset) # vis.log({' epoch:':epoch,' lr: ':lr,'scores':scores,'prec':prec_,'recall':recall_,'ss':_ss,'scores_train':score_meter.value()[0],'loss':loss_meter.value()[0]}) if scores > best_score: print('saving NewBest') best_score = scores best_path = model.save(name=str(scores) + '&' + str(epoch) + '&' + str(ii), new=True) else: try: print('saving') model.save(name=str(scores) + '&' + str(epoch) + '&' + str(ii), new=True) except Exception as e: print(e) if scores < best_score: model.load(best_path, change_opt=False) lr = lr * opt.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr, lr2, 0) pre_loss = loss_meter.value()[0] loss_meter.reset() score_meter.reset()
def main(**kwargs): opt.parse(kwargs) # import ipdb;ipdb.set_trace() files = glob.glob(opt.test_model_path + '/*') total = len(files) now = 0 for file in files: now += 1 print('now: ' + str(now) + '/' + str(total)) print(file) if not os.path.isfile(file): print('is path ') continue filepath, file = os.path.split(file) cuts = file.split('_') modelkind = cuts[0] if modelkind == 'CNNText' and cuts[1] == 'inception': modelkind += '_inception' modeltype = cuts[-2] opt.model_path = os.path.join(opt.test_model_path, file) opt.type_ = modeltype try: # import ipdb;ipdb.set_trace() model = getattr(models, modelkind)(opt).cuda().eval() if opt.model_path is not None: model.load(opt.model_path) opt.parse(kwargs) opt.type_ = modeltype opt.result_path = os.path.join(opt.test_result_path, file) + '.pth' model = model.eval() test_data_title, test_data_content, index2qid, labels = load_data( type_=opt.type_) Num = len(test_data_title) result = np.zeros((Num, 25556)) for i in tqdm.tqdm(range(Num)): if i % opt.batch_size == 0 and i > 0: # import ipdb;ipdb.set_trace() title = np.array(test_data_title[i - opt.batch_size:i]) content = np.array(test_data_content[i - opt.batch_size:i]) result[i - opt.batch_size:i, :] = dotest( model, title, content) if Num % opt.batch_size != 0: # original # title=np.array(test_data_title[opt.batch_size*(Num/opt.batch_size):]) # content=np.array(test_data_content[opt.batch_size*(Num/opt.batch_size):]) # result[opt.batch_size*(Num/opt.batch_size):,:]=dotest(model,title,content) title = np.array(test_data_title[opt.batch_size * (Num // opt.batch_size):]) content = np.array(test_data_content[opt.batch_size * (Num // opt.batch_size):]) result[opt.batch_size * (Num // opt.batch_size):, :] = dotest( model, title, content) # import ipdb;ipdb.set_trace() print('save') print(opt.result_path) t.save(t.from_numpy(result).float(), opt.result_path) if not os.path.exists(os.path.join(filepath + 'dealed')): print('no file') os.makedirs(os.path.join(filepath + 'dealed')) print('move from -- to --') print(os.path.join(filepath, file)) print(os.path.join(filepath + 'dealed')) shutil.move(os.path.join(filepath, file), os.path.join(filepath + 'dealed')) f = open('finish.getprobability.log', 'a', encoding='utf-8') f.write(file + '\n') f.close() except Exception as e: f = open('error.getprobability.log', 'a', encoding='utf-8') f.write(file + '\n') f.close()