def evaluate(self, step): predictor = CopyRnnPredictor(model_info={'model': self.model, 'config': self.args}, vocab_info=self.vocab2id, beam_size=self.args.beam_size, max_target_len=self.args.max_target_len, max_src_length=self.args.max_src_len) def pred_callback(stage): if stage == 'valid': src_filename = self.args.valid_filename dest_filename = self.dest_dir + self.get_basename(self.args.valid_filename) elif stage == 'test': src_filename = self.args.test_filename dest_filename = self.dest_dir + self.get_basename(self.args.test_filename) else: raise ValueError('stage name error, must be in `valid` and `test`') dest_filename += '.batch_{}.pred.jsonl'.format(step) def predict_func(): predictor.eval_predict(src_filename=src_filename, dest_filename=dest_filename, args=self.args, model=self.model, remove_existed=True) return predict_func valid_statistics = self.evaluate_stage(step, 'valid', pred_callback('valid')) test_statistics = self.evaluate_stage(step, 'test', pred_callback('test')) total_statistics = {**valid_statistics, **test_statistics} eval_filename = self.dest_dir + self.args.exp_name + '.batch_{}.eval.json'.format(step) write_json(eval_filename, total_statistics) return valid_statistics['valid_macro'][self.eval_topn[-1]]['f1']
def build_index(self, dest_filename): indexer = hnswlib.Index(space='cosine', dim=self.dim) paper_id_list = [] vector_list = [] idx2paper_id = {} vector_id_list = [] start = time.time() for vec_idx, (paper_id, vector) in enumerate(self.read_paper_item()): vector_id_list.append(vec_idx) paper_id_list.append(paper_id) vector_list.append(vector) idx2paper_id[vec_idx] = paper_id duration = time.time() - start msg_tmpl = 'vector loading completed time consumed {:.0f}min {:.2f}sec' print(msg_tmpl.format(duration // 60, duration % 60)) num_elements = len(paper_id_list) indexer.init_index(max_elements=num_elements, ef_construction=200, M=100) # hnswlib only supports number based index, # therefore, mapper from number id to paper id is required to be saved # vector_data = np.array(vector_list) indexer.add_items(vector_list, vector_id_list) indexer.set_ef(500) indexer.save_index(dest_filename) write_json(dest_filename + '.map', idx2paper_id)
def train_func(self): model = CopyRnnTF(self.args, self.vocab2id) dataloader = KeyphraseDataLoader(data_source=self.args.train_filename, vocab2id=self.vocab2id, mode='train', args=self.args) optimizer = tf.keras.optimizers.Adam( learning_rate=self.args.learning_rate) @tf.function def train_step(x, x_with_oov, x_len, target): batch_size = x.shape[0] dec_len = self.args.max_target_len with tf.GradientTape() as tape: loss = 0 probs, enc_output, prev_h, prev_c = model( x, x_with_oov, x_len, tf.constant(0), target[:, :-1], None, None, tf.convert_to_tensor(batch_size), dec_len) for batch_idx in range(batch_size): dec_target = target[batch_idx, 1:] target_idx = tf.one_hot(dec_target, self.total_vocab_size) dec_step_loss = -tf.reduce_sum( probs[batch_idx, :] * target_idx, axis=1) mask = tf.cast(dec_target != self.pad_idx, dtype=tf.float32) dec_step_loss *= mask loss += tf.reduce_sum(dec_step_loss) / tf.reduce_sum(mask) loss /= batch_size grads = tape.gradient(loss, model.trainable_variables) grads = [(tf.clip_by_value(grad, -0.1, 0.1)) for grad in grads] optimizer.apply_gradients(zip(grads, model.trainable_weights)) return loss step_idx = 0 for epoch in range(self.args.epochs): for batch in dataloader: loss = train_step(batch[TOKENS], batch[TOKENS_OOV], batch[TOKENS_LENS], batch[TARGET]) with self.writer.as_default(): tf.summary.scalar('loss', loss, step=step_idx) step_idx += 1 if not step_idx % self.args.save_model_step: model_basename = self.dest_base_dir + '/{}_step{}'.format( self.exp_name, step_idx) # write_json(model_basename + '.json', vars(self.args)) # beam_search_graph = model.beam_search.get_concrete_function( # x=tf.TensorSpec(shape=[None, self.args.max_src_len], dtype=tf.int64), # x_with_oov=tf.TensorSpec(shape=[None, self.args.max_src_len], dtype=tf.int64), # x_len=tf.TensorSpec(shape=[None], dtype=tf.int64), # batch_size=tf.TensorSpec(shape=[None], dtype=tf.int64) # ) # tf.saved_model.save(model, model_basename, signatures=beam_search_graph) model.save_weights(model_basename + '.ckpt', save_format='tf') write_json(model_basename + '.json', vars(self.args)) f1 = self.evaluate(model, step_idx) self.logger.info('step {}, f1 {}'.format(step_idx, f1))
def train_func(self): # loss_fct = MarginRankingLoss(margin=1, reduction='mean') loss_fct = NLLLoss(reduction='mean') optimizer = AdamW(self.model.parameters(), self.args.learning_rate) step = 0 # cos = nn.CosineSimilarity(dim=1) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=self.args.scheduler_step, gamma=self.args.scheduler_gamma) accumulate_step = 0 for epoch in range(1, self.args.epoch + 1): for batch in self.loader: probs = self.get_probs(batch) batch_size = probs.size(0) true_idx = torch.zeros(batch_size, dtype=torch.long) if torch.cuda.is_available(): true_idx = true_idx.cuda() loss = loss_fct(probs, true_idx) loss.backward() self.writer.add_scalar('loss', loss, step) stop_scheduler_step = self.args.scheduler_step * 80 if accumulate_step % self.args.gradient_accumulate_step == 0: optimizer.step() optimizer.zero_grad() if self.args.scheduler_lr and step <= stop_scheduler_step: scheduler.step() accumulate_step = 0 step += 1 if step % self.args.save_model_step == 0: model_basename = self.args.dest_base_dir + self.args.exp_name model_basename += '_epoch_{}_step_{}'.format(epoch, step) torch.save(self.model.state_dict(), model_basename + '.model') write_json(model_basename + '.json', vars(self.args)) ret = self.evaluate(model_basename, step) self.writer.add_scalar('accuracy', ret, step) # self.writer.add_scalar('recall', ret['recall'], step) # self.writer.add_scalar('f1', ret['f1'], step) msg_tmpl = 'step {} completed, accuracy {:.4f}' self.logger.info(msg_tmpl.format(step, ret))
def evaluate_and_save_model(self, step, epoch): valid_f1 = self.evaluate(step) if self.best_f1 is None: self.best_f1 = valid_f1 self.best_step = step elif valid_f1 >= self.best_f1: self.best_f1 = valid_f1 self.not_update_count = 0 self.best_step = step else: self.not_update_count += 1 exp_name = self.args.exp_name model_basename = self.dest_dir + '{}_epoch_{}_batch_{}'.format(exp_name, epoch, step) torch.save(self.model.state_dict(), model_basename + '.model') write_json(model_basename + '.json', vars(self.args)) score_msg_tmpl = 'best score: step {} macro f1@{} {:.4f}' self.logger.info(score_msg_tmpl.format(self.best_step, self.eval_topn[-1], self.best_f1)) self.logger.info('epoch {} step {}, model saved'.format(epoch, step))
def train_func(self): step = 0 plm_lr = self.args.plm_learning_rate rerank_lr = self.args.rank_learning_rate model = load_rerank_model(self.args) true_score_func = get_score_func(model, 'true', inference=False) false_score_func = get_score_func(model, 'false', inference=False) if torch.cuda.is_available(): model.cuda() loss_fct = MarginRankingLoss(margin=1, reduction='mean') if self.args.separate_learning_rate: params = [(k, v) for k, v in model.named_parameters() if v.requires_grad] non_bert_params = { 'params': [v for k, v in params if not k.startswith('plm_model.')] } bert_params = { 'params': [v for k, v in params if k.startswith('plm_model.')], 'lr': plm_lr } # optimizer = torch.optim.Adam([bert_params, non_bert_params], lr=rerank_lr) optimizer = AdamW([non_bert_params, bert_params], lr=rerank_lr) else: optimizer = AdamW(model.parameters(), plm_lr) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=self.args.scheduler_step, gamma=self.args.scheduler_gamma) accumulate_step = 0 for epoch in range(1, self.args.epoch + 1): for batch in self.train_loader: model.train() true_scores = true_score_func(batch) false_scores = false_score_func(batch) # y all 1s to indicate positive should be higher y = torch.ones(len(true_scores)).float() if torch.cuda.is_available(): y = y.cuda() loss = loss_fct(true_scores, false_scores, y) loss.backward() self.writer.add_scalar('loss', loss, step) accumulate_step += 1 # torch.nn.utils.clip_grad_value_(model.parameters(), 0.01) stop_scheduler_step = self.args.scheduler_step * 8 if accumulate_step % self.args.gradient_accumulate_step == 0: optimizer.step() optimizer.zero_grad() # if self.args.scheduler_lr and step <= stop_scheduler_step: if self.args.scheduler_lr: # and step <= stop_scheduler_step: scheduler.step() accumulate_step = 0 step += 1 if step % self.args.save_model_step == 0: model_basename = self.args.dest_base_dir + self.args.exp_name model_basename += '_epoch_{}_step_{}'.format(epoch, step) torch.save(model.state_dict(), model_basename + '.model') write_json(model_basename + '.json', vars(self.args)) map_top3 = self.evaluate(model, 5, model_basename) self.writer.add_scalar('map@3', map_top3, step) self.logger.info('step {} map@3 {:.4f}'.format( step, map_top3))