async def default(self, message, args): group = get_group(args, message.author.id) ics = await get_group_ics(message, group) if not ics: return # This is only to test (on weekends bruh) # now = datetime.datetime.now().replace(tzinfo=pytz.UTC) # events = ics.timeline.start_after(now) events = list(ics.timeline.today()) now = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC) exists = any(events) embed = discord.Embed(title="It seems you are free today!" if not exists else f"Today's {group} lessons' are", colour=BOT_COLOR, timestamp=now) if exists: embed.set_thumbnail(url=ICON) embed.set_footer(text="Momento", icon_url=ICON) for event in events: start = datetime.datetime.fromisoformat(str(event.begin)) end = datetime.datetime.fromisoformat(str(event.end)) lesson_link = BASE_LESSON + get_group_id(event) desc = "" delim = "__" if now >= start: if now < end: fmt = format_time(get_time_diff(now - start)) desc += f":teacher: [Started since]({lesson_link}) {fmt}\n" else: # if the lesson is over delim = "~~" else: fmt = format_time(get_time_diff(start - now)) desc += f"⏲ [Starts]({lesson_link}) in {fmt}\n" desc += f"From **{start:%H:%M}** to **{end:%H:%M}** " desc += f"with **{get_teacher(event)}**\n" embed.add_field(name=f"{delim}{str(event.name)}{delim}", value=desc, inline=False) msg = await message.channel.send(embed=embed) await msg.add_reaction(emoji='❌')
def predict(self, model, test_dataset, ids, coef=None): model.load_state_dict(torch.load(self.config.model_save_path)) model.eval() start_time = time.time() if coef is None: coef = [1.0, 1.0, 1.0] torch_coef = torch.tensor(coef, device=self.config.device).view(-1, 3) predicts_all = [] for inputs, _ in tqdm( DataLoader(dataset=TensorDataset(test_dataset.dataset, test_dataset.labels), batch_size=self.config.batch_size, shuffle=False)): outputs = model(inputs) outputs = F.softmax(outputs, dim=1) outputs = outputs * torch_coef predicts = list( torch.max(outputs.data, dim=1)[1].cpu().numpy() - 1) predicts_all = predicts_all + predicts time_dif = get_time_diff(start_time) print("Time usage:", time_dif) result_pd = pd.DataFrame({'id': ids, 'y': predicts_all}) result_pd.to_csv('predict_ans.csv', index=False) print("finish !")
def train_model(self, data_loader, model): # 训练模型 start_time = time.time() #获取起始位置 optimizer = optim.Adam(model.parameters(), lr=self.config.learn_rate, betas=(0.9, 0.999)) model.train() criterion = nn.CrossEntropyLoss() total_batch = 0 for data_batch in data_loader: total_batch += 1 sentences1 = data_batch[0] sentences2 = data_batch[1] labels = data_batch[2] if torch.cuda.is_available(): # 判断设备信息 sentences1 = data_batch[0].to(self.config.device) sentences2 = data_batch[1].to(self.config.device) labels = data_batch[2].to(self.config.device) optimizer.zero_grad() outputs = model(sentences1, sentences2) #计算 loss loss = criterion(outputs, labels) loss.backward() optimizer.step() if total_batch % 100 == 0: true_label = labels.data.cpu().numpy() predict = torch.max(outputs, dim=1)[1].cpu().numpy() #获取预测结果 train_acc = metrics.accuracy_score(true_label, predict) time_diff = get_time_diff(start_time) msg = 'Iter:{0:>6} Train loss: {1:>5.3} Train acc:{2:>6.2%} Time:{3}' print(msg.format(total_batch, loss.item(), train_acc, time_diff))
def print_end_message(start_time): """Prints message to indicate end of the run, plus it shows time passed since the beginning""" end_time = utils.get_time() time_lapsed = utils.get_time_diff(start_time,end_time) print(f"Bachelor thesis code ended at {end_time}." f" Total time lapsed: {time_lapsed}")
def on_epoch_end(self, epoch, logs={}): predict = self.model.predict_classes(self.x_val, batch_size=self.batch_size) # Average precision weighted_prec=precision_score(self.y_val, predict, 'weighted') * 100 self.avg_precision_weighted.append( weighted_prec) micro_prec=precision_score(self.y_val, predict, 'micro') * 100 self.avg_precision_micro.append( micro_prec) macro_prec=precision_score(self.y_val, predict, 'macro') * 100 self.avg_precision_macro.append( macro_prec) weighted_recall = recall_score(self.y_val, predict, 'weighted') * 100 self.avg_recall_weighted.append(weighted_recall) micro_recall = recall_score(self.y_val, predict, 'micro') * 100 self.avg_recall_micro.append(micro_recall) macro_recall = recall_score(self.y_val, predict, 'macro') * 100 self.avg_recall_macro.append(macro_recall) weighted_fscore = f1_score(self.y_val, predict, 'weighted') * 100 self.avg_f1score_weighted.append(weighted_fscore) micro_fscore = f1_score(self.y_val, predict, 'micro') * 100 self.avg_f1score_micro.append(micro_fscore) macro_fscore = f1_score(self.y_val, predict, 'macro') * 100 self.avg_f1score_macro.append(macro_fscore) positive_prec = precision_score(self.y_val, predict, 'binary') * 100 neg_prec = precision_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_precision.append(positive_prec) self.neg_precision.append(neg_prec) pos_recall = recall_score(self.y_val, predict, 'binary') * 100 neg_recall = recall_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_recall.append(pos_recall) self.neg_recall.append(neg_recall) pos_fscore = f1_score(self.y_val, predict, 'binary') * 100 neg_fscore = f1_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_f1_score.append(pos_fscore ) self.neg_f1_score.append(neg_fscore) self.loss.append(logs.get('loss')) acc=accuracy_score(self.y_val, predict)*100 self.accuracy.append(acc) done = time.time() elapsed_formated = get_time_diff(self.start_time,done) elapsed_time=int((done-self.start_time)/60) # in minutes print elapsed_formated print elapsed_time self.elapsed_time.append(elapsed_time) self.elapsed_time_formatted.append(elapsed_formated) result_csv = open(self.out_csv, "a+") csv_writer = csv.writer(result_csv, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) csv_writer.writerow( [epoch, acc, logs.get('loss'), weighted_prec,weighted_recall, weighted_fscore, positive_prec, neg_prec, pos_recall, neg_recall, pos_fscore, neg_fscore, elapsed_time]) return
async def next(self, message, args): group = get_group(args, message.author.id) ics = await get_group_ics(message, group) if not ics: return now = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC) # Should be using this but the function is overloaded Kek # event = next(ics.timeline.start_after(now)) event = ics.timeline.start_after(now).__next__() start = datetime.datetime.fromisoformat(str(event.begin)) end = datetime.datetime.fromisoformat(str(event.end)) lesson_link = BASE_LESSON + get_group_id(event) desc = "" if now >= start: if now < end: fmt = format_time(get_time_diff(now - start)) desc += f":teacher: [Started since]({lesson_link}) {fmt} " else: fmt = format_time(get_time_diff(start - now)) desc += f"⏲ [Starts]({lesson_link}) in {fmt} " fmt = "%A %d %B" desc += f"on **{start.strftime(fmt)}**\n" desc += f"From **{start:%H:%M}** to **{end:%H:%M}** " desc += f"with **{get_teacher(event)}**\n" embed = discord.Embed(title=f"Next lesson for {group}", colour=BOT_COLOR, timestamp=now) embed.add_field(name=f"__{str(event.name)}__", value=desc, inline=False) try: url = PROFESSORS[get_teacher(event).lower()] except: url = ICON embed.set_thumbnail(url=url) embed.set_footer(text="Momento", icon_url=ICON) await message.channel.send(embed=embed)
def evaluate_model(self, model, dev_loader, fold_num): start_time = time.time() dev_acc, dev_loss, dev_report, dev_confusion, f1_score = self.evaluate( model, dev_loader, fold_num, flag=True) print('未使用指标优化') msg = "Test Loss:{0:>5.2}, Test Acc:{1:>6.2%}" print(msg.format(dev_loss, dev_acc)) print("Precision, Recall and F1-Score...") print(dev_report) print("Confusion Matrix...") print(dev_confusion) time_diff = get_time_diff(start_time) print("Time usage:", time_diff)
def on_epoch_end(self, epoch, logs={}): predict = self.model.predict_classes(self.x_val, batch_size=self.batch_size) # Average precision weighted_prec=precision_score(self.y_val, predict, 'weighted') * 100 self.avg_precision_weighted.append( weighted_prec) micro_prec=precision_score(self.y_val, predict, 'micro') * 100 self.avg_precision_micro.append( micro_prec) macro_prec=precision_score(self.y_val, predict, 'macro') * 100 self.avg_precision_macro.append( macro_prec) weighted_recall = recall_score(self.y_val, predict, 'weighted') * 100 self.avg_recall_weighted.append(weighted_recall) micro_recall = recall_score(self.y_val, predict, 'micro') * 100 self.avg_recall_micro.append(micro_recall) macro_recall = recall_score(self.y_val, predict, 'macro') * 100 self.avg_recall_macro.append(macro_recall) weighted_fscore = f1_score(self.y_val, predict, 'weighted') * 100 self.avg_f1score_weighted.append(weighted_fscore) micro_fscore = f1_score(self.y_val, predict, 'micro') * 100 self.avg_f1score_micro.append(micro_fscore) macro_fscore = f1_score(self.y_val, predict, 'macro') * 100 self.avg_f1score_macro.append(macro_fscore) positive_prec = precision_score(self.y_val, predict, 'binary') * 100 neg_prec = precision_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_precision.append(positive_prec) self.neg_precision.append(neg_prec) pos_recall = recall_score(self.y_val, predict, 'binary') * 100 neg_recall = recall_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_recall.append(pos_recall) self.neg_recall.append(neg_recall) pos_fscore = f1_score(self.y_val, predict, 'binary') * 100 neg_fscore = f1_score(self.y_val, predict, average='binary', pos_label=0) * 100 self.pos_f1_score.append(pos_fscore ) self.neg_f1_score.append(neg_fscore) self.loss.append(logs.get('loss')) acc=accuracy_score(self.y_val, predict)*100 self.accuracy.append(acc) done = time.time() elapsed_formated = get_time_diff(self.start_time,done) elapsed_time=done-self.start_time self.elapsed_time.append(elapsed_time) self.elapsed_time_formatted.append(elapsed_formated) return
def process(self, vals): self.event_count = self.event_count + 1 event = vals[8] curr_time = vals[1] if event in ["PRACTICE_SEARCH_TASK_COMMENCED", "SEARCH_TASK_COMMENCED"]: self.session_start_time = curr_time if self.session_start_time is not None and event in [ "PRACTICE_SEARCH_TASK_COMPLETED", "SESSION_COMPLETED", "EXPERIMENT_TIMEOUT", "SNIPPET_POSTTASK_SURVEY_STARTED", "SEARCH_TASK_COMPLETED", ]: self.session_time = get_time_diff(self.session_start_time, curr_time) self.session_start_time = None if event in ["DOC_CLICKED"]: self.doc_clicked_time = curr_time if event in ["DOC_MARKED_VIEWED", "DOC_MARKED_RELEVANT"]: self.doc_lag_time += get_time_diff(self.doc_clicked_time, curr_time)
def test(config, model, test_iter): model.load_state_dict(torch.load(config.save_path)) model.eval() start_time = time.time() test_acc, test_loss, test_report, test_confusion = evaluate(config, model, test_iter, test=True) msg = "Test Loss: {0:>5.2}, Test Acc: {1:>6.2%}" print(msg.format(test_loss, test_acc)) print("Precision, Recall and F1-Score...") print(test_report) print("Confusion Matrix...") print(test_confusion) time_dif = get_time_diff(start_time) print("Time usage:", time_dif)
def _load_data_into_dataframe(filename, is_train): """ """ data_path = os.path.join(conf.DATA_DIR, filename) if is_train else conf.PRED_DATA_DIR logger.info('加载数据集: %s' % data_path) start_time = time() if not is_train: disk_smart_df = _get_pred_data(data_path) # the store format for pre-processed train data is h5 else: disk_smart_df = pd.read_hdf( data_path, columns=SELECTED_CONT_COLS + SELECTED_INDEX_COLS + SELECTED_CATE_COLS + SELECTED_LABEL_COLS, ) logger.info('使用的cols: %s' % disk_smart_df.columns) end_time = time() logger.info('加载数据集完成,共用时: %s' % get_time_diff(start_time, end_time)) return disk_smart_df
def end_query_session(self,end_time): # DMAX added in this condition to take the first event only. # Subsequent events add overhead to the time - that isn't strictly part of the session. if self.session_end_time is None: # Added by David on December 13, 2016 # Last events (e.g. EXPERIMENT_TIMEOUT) should not be considered as the final session event. # Some people in the experiment walk away (or something) for several minutes, meaning that times are way out. # In this case, we roll back to the last interaction event - where the event is not EXPERIMENT_TIMEOUT or SESSION_COMPLETED. end_time = self.last_interaction_time self.session_end_time = end_time # print "END EVENT: {0}".format(self.last_interaction_event) # print "END TIME: {0}".format(self.last_interaction_time) self.session_time = get_time_diff(self.session_start_time, end_time) #print "session time", self.session_time self.update_times(end_time) #if self.last_event == 'VIEW_SEARCH_RESULTS_PAGE': # self.snippet_time = self.snippet_time + get_time_diff(self.view_serp_time, end_time) # Adding some code to work out probabilities for clicking! relevant_count = 0 for i in range(0, self.hover_depth): if self.hover_depth > len(self.query_response.results): continue if self.qrel_handler.get_value(self.topic, self.query_response.results[i].docid) > 0: relevant_count = relevant_count + 1 for i in range(0, self.hover_depth): docid_at_rank = self.query_response.results[i].docid if is_relevant(self.qrel_handler, self.topic, docid_at_rank) == 0: self.hover_trec_nonrel_count = self.hover_trec_nonrel_count + 1 else: self.hover_trec_rel_count = self.hover_trec_rel_count + 1
def predict_k_fold(self, model, test_loader, ids, fold_index, coef=None): model.load_state_dict( torch.load(self.config.model_save_path + "-fold" + str(fold_index))) model.eval() start_time = time.time() if coef is None: coef = [1.0, 1.0, 1.0] torch_coef = torch.tensor(coef, device=self.config.device).view(-1, 3) predicts_all = np.random.randn(1, 3) with torch.no_grad(): for data_batch in tqdm(test_loader): input_ids = data_batch[0].clone().detach().to( self.config.device) attention_masks = data_batch[1].clone().detach().to( self.config.device) token_type_ids = data_batch[2].clone().detach().to( self.config.device) model_inputs = (input_ids, attention_masks, token_type_ids) outputs = model(model_inputs) outputs = F.softmax(outputs, dim=1) outputs = outputs * torch_coef predicts = outputs.cpu().numpy() predicts_all = np.concatenate((predicts_all, predicts), axis=0) time_dif = get_time_diff(start_time) print("Time usage:", time_dif) predicts_all = np.delete(predicts_all, 0, axis=0) result_pd = pd.DataFrame( { '-1': predicts_all.T[0], '0': predicts_all.T[1], '1': predicts_all.T[2], }, index=ids) save_name = self.config.predict_save_path + "-_fold" + str( fold_index) + ".csv" result_pd.to_csv(save_name)
def test(config, model, test_iter): """ 模型测试 :param config: :param model: :param test_iter: :return: """ model.load_state_dict(torch.load(config.save_path)) model.eval() start_time = time.time() test_acc, test_loss, test_report, test_confusion = evaluate(config, model, test_iter, test=True) msg = 'Test Loss:{0:>5.2}, Test Acc:{1:>6.2%}' print(msg.format(test_loss, test_acc)) print("Precision, Recall and F1-Score") print(test_report) print("Confusion Maxtrix") print(test_confusion) time_diff = utils.get_time_diff(start_time) print("使用时间:", time_diff)
def get_time_diff(self): return utils.get_time_diff(self)
def train(config, model, train_iter, dev_iter, test_iter=None): start_time = time.time() model.train() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5) total_batch = 0 dev_best_loss = float("inf") last_improve = 0 flag = False writer = SummaryWriter( log_dir=os.path.join(config.log_path, time.strftime('%H_%M_%S'))) for epoch in range(config.num_epoches): print("Epoch [{}/{}]".format(epoch + 1, config.num_epoches)) # scheduler.step() for i, (trains, labels) in enumerate(train_iter): outputs = model(trains) model.zero_grad() loss = F.cross_entropy(outputs, labels) loss.backward() optimizer.step() if total_batch % 100 == 0: true = labels.data.cpu() predic = torch.max(outputs.data, 1)[1].cpu() train_acc = metrics.accuracy_score(true, predic) dev_acc, dev_loss = evaluate(config, model, dev_iter) if dev_loss < dev_best_loss: dev_best_loss = dev_loss torch.save(model.state_dict(), config.save_path) improve = "*" last_improve = total_batch else: improve = "" time_dif = get_time_diff(start_time) msg = "Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Time: {5} {6}" print( msg.format( total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve, )) writer.add_scalar("loss/train", loss.item(), total_batch) writer.add_scalar("loss/dev", dev_loss, total_batch) writer.add_scalar("acc/train", train_acc, total_batch) writer.add_scalar("acc/dev", dev_acc, total_batch) model.train() total_batch += 1 if total_batch - last_improve > config.require_improvement: print("No optimization for a long time, auto-stopping...") flag = True break if flag: break writer.close() if test_iter: test(config, model, test_iter) else: test(config, model, dev_iter)
default='SkyerERNIEDPCNN', help= 'choose a model SkyerBert, SkyerBertCNN, SkyerBertRNN, SkyerBertRCNN, SkyerBertDPCNN, SkyerERNIE,SkyerERNIEDPCNN' ) args = parser.parse_args() if __name__ == '__main__': dataset = 'THUCNews' #数据集地址 model_name = args.model x = import_module('models.' + model_name) config = x.Config(dataset) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(4) torch.backends.cudnn.deterministic = True #保证每次运行结果一样 start_time = time.time() print('加载数据集') train_data, dev_data, test_data = utils.build_dataset(config) train_iter = utils.build_iterator(train_data, config) dev_iter = utils.build_iterator(dev_data, config) test_iter = utils.build_iterator(test_data, config) time_diff = utils.get_time_diff(start_time) print("模型开始之前,准备数据时间:", time_diff) # 模型训练,评估与测试 model = x.Model(config).to(config.device) train.train(config, model, train_iter, dev_iter, test_iter) #train.test(config, model, test_iter)
def train(config, model, train_iter, dev_iter, test_iter): """ 模型训练方法 :param config: :param model: :param train_iter: :param dev_iter: :param test_iter: :return: """ start_time = time.time() #启动 BatchNormalization 和 dropout model.train() #拿到model的所有参数 param_optimizer = list(model.named_parameters()) # 不需要衰减的参数 no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(params=optimizer_grouped_parameters, lr=config.learning_rate, warmup=0.05, t_total=len(train_iter) * config.num_epochs) total_batch = 0 #记录进行多少batch dev_best_loss = float('inf') #记录校验集合最好的loss last_improve = 0 #记录上次校验集loss下降的batch数 flag = False #记录是否很久没有效果提升,停止训练 model.train() for epoch in range(config.num_epochs): print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs)) for i, (trains, labels) in enumerate(train_iter): outputs = model(trains) model.zero_grad() loss = F.cross_entropy(outputs, labels) loss.backward(retain_graph=False) optimizer.step() if total_batch % 100 == 0: #每多少次输出在训练集和校验集上的效果 true = labels.data.cpu() predict = torch.max(outputs.data, 1)[1].cpu() train_acc = metrics.accuracy_score(true, predict) dev_acc, dev_loss = evaluate(config, model, dev_iter) if dev_loss < dev_best_loss: dev_best_loss = dev_loss torch.save(model.state_dict(), config.save_path) improve = '*' last_improve = total_batch else: improve = '' time_diff = utils.get_time_diff(start_time) msg = 'Iter:{0:>6}, Train Loss:{1:>5.2}, Train Acc:{2:>6.2}, Val Loss:{3:>5.2}, Val Acc:{4:>6.2%}, Time:{5} {6}' print( msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_diff, improve)) model.train() total_batch = total_batch + 1 if total_batch - last_improve > config.require_improvement: #在验证集合上loss超过1000batch没有下降,结束训练 print('在校验数据集合上已经很长时间没有提升了,模型自动停止训练') flag = True break if flag: break test(config, model, test_iter)
if not label.startswith('.')) config = x.Config(data_dir, class_list, vocab_path) config.pad_size = pad_size np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True start_time = time.time() print("Loading data...") vocab, train_data, dev_data, test_data = build_dataset(config, args.word) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) test_iter = build_iterator(test_data, config) time_dif = get_time_diff(start_time) print("Time usage:", time_dif) config.num_vocab = len(vocab) model = x.Model(config).to(config.device) init_network(model) print(model.parameters) train(config, model, train_iter, dev_iter) predictions = predict(config, model, test_iter) if not os.path.exists('result'): os.mkdir('result') with open(os.path.join( 'result',
def train(self, model, train_loader, dev_loader, fold_num, use_weight=False): start_time = time.time() t_total = len(train_loader) * self.config.num_epochs # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': self.config.weight_decay }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] warmup_steps = int(t_total * 0.1) optimizer = AdamW(optimizer_grouped_parameters, lr=self.config.learn_rate, eps=1e-8) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=warmup_steps, num_training_steps=t_total) total_batch = 0 if use_weight: weight = torch.tensor([2, 1, 1.5], dtype=torch.float).to(self.config.device) criterion = nn.CrossEntropyLoss(weight=weight) else: criterion = nn.CrossEntropyLoss() dev_per_batch = 500 #dev_best_loss = float('inf') f1_score_best = 0 last_improve = 0 model.train() total_loss = 0 if self.config.adv_type == 'fgm': fgm = FGM(model) for epoch in range(self.config.num_epochs): print('epoch [{}/{}]'.format(epoch + 1, self.config.num_epochs)) for data_batch in train_loader: # 转化为 tensor input_ids = data_batch[0].clone().detach().to( self.config.device) attention_masks = data_batch[1].clone().detach().to( self.config.device) token_type_ids = data_batch[2].clone().detach().to( self.config.device) labels = data_batch[3].clone().detach().long().to( self.config.device) model_inputs = (input_ids, attention_masks, token_type_ids) total_batch += 1 model.zero_grad() outputs = model(model_inputs) loss = criterion(outputs, labels.view(-1)) loss.backward() # 对抗 if self.config.adv_type == 'fgm': fgm.attack() ##对抗训练 adv_outputs = model(model_inputs) loss_adv = criterion(adv_outputs, labels.view(-1)) loss_adv.backward() fgm.restore() torch.nn.utils.clip_grad_norm_(model.parameters(), self.config.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule if total_batch % dev_per_batch == 0: true_labels = labels.data.cpu() predicts = torch.max(outputs.data, dim=1)[1].cpu().numpy() train_acc = metrics.accuracy_score(true_labels, predicts) time_dif = get_time_diff(start_time) dev_acc, dev_loss, report, confusion, f1_score = self.evaluate( model, dev_loader, fold_num) model.train() if f1_score_best < f1_score: f1_score_best = f1_score improve = '*' torch.save( model.state_dict(), self.config.model_save_path + "-fold" + str(fold_num)) torch.save(model.state_dict(), 'saveModel/temp') else: improve = ' ' if f1_score_best - 0.02 > f1_score: improve = '-' model.load_state_dict(torch.load('saveModel/temp')) else: torch.save(model.state_dict(), 'saveModel/temp') msg = 'Epoch:{0:>2} Iter: {1:>6}, Train Loss: {2:>5.2}, Train Acc: {3:>6.3%},' \ ' Dev Loss: {4:>5.2}, Dev Acc: {5:>6.3%}, f1_score: {6:>8.7}, Time: {7} {8}' print( msg.format(epoch + 1, total_batch, loss.item(), train_acc, dev_loss, dev_acc, f1_score, time_dif, improve)) self.evaluate_model(model, dev_loader, fold_num)
def update_times(self, curr_time): #print curr_time, self.last_time, get_time_diff(self.last_time, curr_time) if self.curr_event == 'DELAY_RESULTS_PAGE': self.serp_lag = get_time_diff(self.session_start_time, curr_time) self.last_query_delay_time = curr_time if self.curr_event == 'QUERY_COMPLETE': # Was VIEW_SEARCH_RESULTS_PAGE if self.last_event == 'DELAY_RESULTS_PAGE': self.imposed_query_delay = get_time_diff(self.last_query_delay_time, curr_time) #if self.last_event == 'QUERY_END': # Was QUERY_ISSUED # self.serp_lag = get_time_diff(self.session_start_time, curr_time) if self.system_query_delay == 0.0 and self.curr_event == 'QUERY_END' and self.last_event == 'QUERY_START': self.system_query_delay = self.system_query_delay + get_time_diff(self.last_time, curr_time) if self.curr_event == 'DOCUMENT_DELAY_VIEW': # Document delay occurred, so track the time this happened at. self.last_document_delay_time = curr_time self.view_serp_time = self.view_serp_time + get_time_diff(self.last_time, curr_time) if self.curr_event == 'DOC_MARKED_VIEWED': if self.last_document_delay_time: if get_time_diff(self.last_document_delay_time, curr_time) < 10.0: self.imposed_document_delay += get_time_diff(self.last_document_delay_time, curr_time) else: self.view_serp_time += get_time_diff(self.last_time, curr_time) else: self.view_serp_time += get_time_diff(self.last_time, curr_time) if self.curr_event in ['DOCUMENT_HOVER_OUT', 'DOCUMENT_HOVER_IN', 'QUERY_FOCUS','VIEW_SAVED_DOCS','VIEW_TASK' ]: self.view_serp_time = self.view_serp_time + get_time_diff(self.last_time, curr_time) # This could be more robust. # What if the searcher were to view the list of documents marked, or view the task, whilst viewing a document? # Maybe this functionality should be disabled while a document is being viewed. # Commented out by DMAX on June 8th 2016 - replaced with more robust document time measures (see below). #if self.last_event in ['DOC_MARKED_VIEWED','DOC_MARKED_RELEVANT','DOC_MARKED_NONRELEVANT']: # self.document_time = self.document_time + get_time_diff(self.last_time, curr_time) # DMAX - Added new document time measures (June 8th 2016) # self.doc_click_time contains the document click time. Set to False otherwise. if not self.doc_click_time and self.curr_event == 'DOC_CLICKED': self.doc_click_time = curr_time # Added in VIEW_SAVED_DOCS to cater for the event where a searcher flips to the saved document screen instead. if self.doc_click_time and self.curr_event in ['QUERY_START', 'VIEW_SAVED_DOCS', 'PRACTICE_SEARCH_TASK_COMPLETED','SESSION_COMPLETED','EXPERIMENT_TIMEOUT','SNIPPET_POSTTASK_SURVEY_STARTED','SEARCH_TASK_COMPLETED']: self.document_time = self.document_time + get_time_diff(self.doc_click_time, curr_time) self.doc_click_time = False # DMAX - End new document time measures # DMAX - Adding in new SERP details if not self.last_serp_event and self.curr_event == 'VIEW_SEARCH_RESULTS_PAGE': self.last_serp_event = curr_time #elif self.last_serp_event and self.curr_event == 'QUERY_FOCUS': # print 'QF', curr_time elif self.last_serp_event and self.curr_event not in ['DOCUMENT_HOVER_IN', 'DOCUMENT_HOVER_OUT']: self.new_total_serp = self.new_total_serp + get_time_diff(self.last_serp_event, curr_time) self.last_serp_event = None # DMAX - End new SERP details # DMAX - Updated SERP lag time if self.curr_event == 'QUERY_END' and self.last_event == 'QUERY_START': self.serp_lag = self.serp_lag + get_time_diff(self.last_time, curr_time)
def process(self, vals): self.event_count = self.event_count + 1 #self.last_event_time # We want to measure query time from the last QUERY_FOCUS event. # We could do it from the first, but we decided this could be too unreliable... # So every time we see a new QUERY_FOCUS, we override what we have before and update the time accordingly. # Commented out this line so that this is overwritten #if self.last_query_focus_time is None: if ('QUERY_FOCUS' in vals): self.last_query_focus_time = '{date} {time}'.format(date=vals[0],time=vals[1]) if self.last_query_focus_time is None: if ('VIEW_SEARCH_BOX' in vals): self.last_query_focus_time = '{date} {time}'.format(date=vals[0],time=vals[1]) # End de-dentation if ('QUERY_ISSUED' in vals): # new query, create a query log entry if self.current_query: if self.last_query_focus_time: lqft = self.last_query_focus_time else: lqft = self.last_event_time # We didn't see a FOCUS or VIEW_SEARCH_BOX, so fallback to last event time. self.current_query.end_query_session(lqft) #print "QUERY ISSUED:", vals[8:] #print self.last_query_focus_time, ':::', vals[1], ':::', get_time_diff(self.last_query_focus_time, vals[1]) #print if self.last_query_focus_time is None: self.last_query_focus_time = self.last_event_time self.current_query = QueryLogEntry(self.key, vals, self.qrel_handler, self.engine, get_time_diff(self.last_query_focus_time, '{date} {time}'.format(date=vals[0],time=vals[1]))) self.last_query_focus_time = None self.query_ended_previously = False self.queries.append(self.current_query) else: if self.current_query: # process result under this query object self.current_query.process(vals) # probably should put a condition on this (start task, doc viewed, view serp, etc, ) not all/any self.last_event_time = '{date} {time}'.format(date=vals[0],time=vals[1]) event = vals[8] if event in ['PRACTICE_SEARCH_TASK_COMPLETED','SESSION_COMPLETED','EXPERIMENT_TIMEOUT','SNIPPET_POSTTASK_SURVEY_STARTED','SEARCH_TASK_COMPLETED']: #print 'search task complete - event' if self.current_query and not self.query_ended_previously: #print "end of search session" self.current_query.end_query_session('{date} {time}'.format(date=vals[0],time=vals[1])) self.query_ended_previously = True # Code for removing documents that were previously marked, but are then reselected as non-relevant. all_docs_unmarked = [] for query_object in self.queries: all_docs_unmarked = all_docs_unmarked + query_object.doc_unmarked_list query_object.doc_unmarked_list = [] for query_object in self.queries: for docid in all_docs_unmarked: if docid in query_object.doc_marked_list: topic = self.key.split(' ')[4] query_object.doc_marked_list.remove(docid) query_object.doc_rel_count = query_object.doc_rel_count - 1 if is_relevant(self.qrel_handler, topic, docid) == 0: query_object.doc_clicked_trec_nonrel_count = query_object.doc_clicked_trec_nonrel_count - 1 else: query_object.doc_clicked_trec_rel_count = query_object.doc_clicked_trec_rel_count - 1