def valid_epoch(self, data):
     pbar = ProgressBar(n_total=len(data))
     self.epoch_reset()
     self.model.eval()
     with torch.no_grad():
         for step, batch in enumerate(data):
             batch = tuple(t.to(self.device) for t in batch)
             input_ids, input_mask, segment_ids, label_ids = batch
             logits = self.model(input_ids, input_mask, segment_ids)
             self.outputs.append(logits.cpu().detach())
             self.targets.append(label_ids.cpu().detach())
             pbar.batch_step(step=step, info={}, bar_type='Evaluating')
         self.outputs = torch.cat(self.outputs, dim=0).cpu().detach()
         self.targets = torch.cat(self.targets, dim=0).cpu().detach()
         loss = self.criterion(target=self.targets, output=self.outputs)
         self.result['valid_loss'] = loss.item()
         print("------------- valid result --------------")
         if self.epoch_metrics:
             for metric in self.epoch_metrics:
                 metric(logits=self.outputs, target=self.targets)
                 value = metric.value()
                 if value:
                     self.result[f'valid_{metric.name()}'] = value
         if 'cuda' in str(self.device):
             torch.cuda.empty_cache()
         return self.result
def create_training_instances(input_file, tokenizer, max_seq_len,
                              short_seq_prob, max_ngram, masked_lm_prob,
                              max_predictions_per_seq):
    """Create `TrainingInstance`s from raw text."""
    all_documents = [[]]
    # Input file format:
    # (1) One sentence per line. These should ideally be actual sentences, not
    # entire paragraphs or arbitrary spans of text. (Because we use the
    # sentence boundaries for the "next sentence prediction" task).
    # (2) Blank lines between documents. Document boundaries are needed so
    # that the "next sentence prediction" task doesn't span between documents.
    f = open(input_file, 'r')
    lines = f.readlines()
    pbar = ProgressBar(n_total=len(lines), desc='read data')
    for line_cnt, line in enumerate(lines):
        line = line.strip()
        # Empty lines are used as document delimiters
        if not line:
            all_documents.append([])
        tokens = tokenizer.tokenize(line)
        if tokens:
            all_documents[-1].append(tokens)
        pbar(step=line_cnt)
    print(' ')
    # Remove empty documents
    all_documents = [x for x in all_documents if x]
    random.shuffle(all_documents)

    vocab_words = list(tokenizer.vocab.keys())
    instances = []
    pbar = ProgressBar(n_total=len(all_documents), desc='create instances')
    for document_index in range(len(all_documents)):
        instances.extend(
            create_instances_from_document(all_documents, document_index,
                                           max_seq_len, short_seq_prob,
                                           max_ngram, masked_lm_prob,
                                           max_predictions_per_seq,
                                           vocab_words))
        pbar(step=document_index)
    print(' ')
    ex_idx = 0
    while ex_idx < 5:
        instance = instances[ex_idx]
        logger.info("-------------------------Example-----------------------")
        logger.info(f"id: {ex_idx}")
        logger.info(
            f"tokens: {' '.join([str(x) for x in instance['tokens']])}")
        logger.info(
            f"masked_lm_labels: {' '.join([str(x) for x in instance['masked_lm_labels']])}"
        )
        logger.info(
            f"segment_ids: {' '.join([str(x) for x in instance['segment_ids']])}"
        )
        logger.info(
            f"masked_lm_positions: {' '.join([str(x) for x in instance['masked_lm_positions']])}"
        )
        logger.info(f"is_random_next : {instance['is_random_next']}")
        ex_idx += 1
    random.shuffle(instances)
    return instances
예제 #3
0
 def create_examples(self, lines, example_type, cached_examples_file):
     '''
     Creates examples for data
     '''
     pbar = ProgressBar(n_total=len(lines))
     if cached_examples_file.exists():
         logger.info("Loading examples from cached file %s",
                     cached_examples_file)
         examples = torch.load(cached_examples_file)
     else:
         examples = []
         for i, line in enumerate(lines):
             guid = '%s-%d' % (example_type, i)
             text_a = line[0]
             label = line[1]
             if isinstance(label, str):
                 label = [np.float(x) for x in label.split(",")]
             else:
                 label = [np.float(x) for x in list(label)]
             text_b = None
             example = InputExample(guid=guid,
                                    text_a=text_a,
                                    text_b=text_b,
                                    label=label)
             examples.append(example)
             pbar.batch_step(step=i, info={}, bar_type='create examples')
         logger.info("Saving examples into cached file %s",
                     cached_examples_file)
         torch.save(examples, cached_examples_file)
     return examples
 def predict(self, data, thresh):
     pbar = ProgressBar(n_total=len(data))
     all_logits = None
     # y_true = torch.LongTensor()
     y_true = None
     self.model.eval()
     with torch.no_grad():
         for step, batch in enumerate(data):
             batch = tuple(t.to(self.device) for t in batch)
             input_ids, input_mask, segment_ids, label_ids = batch
             # y_true = torch.cat((y_true, label_ids), 0)
             if y_true is None:
                 y_true = label_ids.detach().cpu().numpy()
             else:
                 y_true = np.concatenate(
                     [y_true, label_ids.detach().cpu().numpy()], axis=0)
             logits = self.model(input_ids, segment_ids, input_mask)
             logits = logits.sigmoid()
             if all_logits is None:
                 all_logits = logits.detach().cpu().numpy()
             else:
                 all_logits = np.concatenate(
                     [all_logits, logits.detach().cpu().numpy()], axis=0)
             pbar.batch_step(step=step, info={}, bar_type='Testing')
     y_pred = (all_logits > thresh) * 1
     micro = f1_score(y_true, y_pred, average='micro')
     macro = f1_score(y_true, y_pred, average='macro')
     score = (micro + macro) / 2
     self.logger.info("\nScore: micro {}, macro {} Average {}".format(
         micro, macro, score))
     if 'cuda' in str(self.device):
         torch.cuda.empty_cache()
     return all_logits, y_pred
 def train_epoch(self, data):
     pbar = ProgressBar(n_total=len(data))
     tr_loss = AverageMeter()
     self.epoch_reset()
     for step, batch in enumerate(data):
         self.batch_reset()
         self.model.train()
         batch = tuple(t.to(self.device) for t in batch)
         input_ids, input_mask, segment_ids, label_ids = batch
         print("input_ids, input_mask, segment_ids, label_ids SIZE: \n")
         print(input_ids.size(), input_mask.size(), segment_ids.size(),
               label_ids.size())
         logits = self.model(input_ids, input_mask, segment_ids)
         print("logits and label ids size: ", logits.size(),
               label_ids.size())
         loss = self.criterion(output=logits, target=label_ids)
         if len(self.n_gpu) >= 2:
             loss = loss.mean()
         if self.gradient_accumulation_steps > 1:
             loss = loss / self.gradient_accumulation_steps
         if self.fp16:
             with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                 scaled_loss.backward()
             clip_grad_norm_(amp.master_params(self.optimizer),
                             self.grad_clip)
         else:
             loss.backward()
             clip_grad_norm_(self.model.parameters(), self.grad_clip)
         if (step + 1) % self.gradient_accumulation_steps == 0:
             self.lr_scheduler.step()
             self.optimizer.step()
             self.optimizer.zero_grad()
             self.global_step += 1
         if self.batch_metrics:
             for metric in self.batch_metrics:
                 metric(logits=logits, target=label_ids)
                 self.info[metric.name()] = metric.value()
         self.info['loss'] = loss.item()
         tr_loss.update(loss.item(), n=1)
         if self.verbose >= 1:
             pbar.batch_step(step=step, info=self.info, bar_type='Training')
         self.outputs.append(logits.cpu().detach())
         self.targets.append(label_ids.cpu().detach())
     print("\n------------- train result --------------")
     # epoch metric
     self.outputs = torch.cat(self.outputs, dim=0).cpu().detach()
     self.targets = torch.cat(self.targets, dim=0).cpu().detach()
     self.result['loss'] = tr_loss.avg
     if self.epoch_metrics:
         for metric in self.epoch_metrics:
             metric(logits=self.outputs, target=self.targets)
             value = metric.value()
             if value:
                 self.result[f'{metric.name()}'] = value
     if "cuda" in str(self.device):
         torch.cuda.empty_cache()
     return self.result
 def create_examples(self, lines, example_type, cached_examples_file):
     '''
     Creates examples for data
     '''
     pbar = ProgressBar(n_total=len(lines), desc='create examples')
     if cached_examples_file.exists():
         logger.info("Loading examples from cached file %s",
                     cached_examples_file)
         examples = torch.load(cached_examples_file)
     else:
         examples = []
         for i, line in enumerate(lines):
             guid = '%s-%d' % (example_type, i)
             text_a = line[0]
             text_b = line[1]
             label = line[2]
             label = int(label)
             example = InputExample(guid=guid,
                                    text_a=text_a,
                                    text_b=text_b,
                                    label=label)
             examples.append(example)
             pbar(step=i)
         logger.info("Saving examples into cached file %s",
                     cached_examples_file)
         torch.save(examples, cached_examples_file)
     return examples
예제 #7
0
def take_eval_steps(args, model, tokenizer, prune, prefix=""):
     # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,)
    #eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,)

    results = {}
    #for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
    for eval_task in eval_task_names:
        eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, data_type='dev')
        #if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        #    os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size,
                                     collate_fn=collate_fn)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model = model.model
        model.eval()
        pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating")
        for step, batch in enumerate(eval_dataloader):
            batch = tuple(t.to(args.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'labels': batch[3]}
                #inputs['token_type_ids'] = batch[2]
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)
            pbar(step)
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        result = compute_metrics(eval_task, preds, out_label_ids)
        results.update(result)
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
    return results
예제 #8
0
 def read_data_and_create_examples(self, example_type, cached_examples_file,
                                   input_file):
     if cached_examples_file.exists():
         logger.info("Loading examples from cached file %s",
                     cached_examples_file)
         examples = torch.load(cached_examples_file)
     else:
         # create examples
         df_dataset = pd.read_csv(input_file).fillna("")
         pbar = ProgressBar(n_total=len(df_dataset), desc='create examples')
         examples = []
         for i, row in df_dataset.iterrows():
             guid = '%s-%d' % (example_type, i)
             seq_id = row["id"]
             text_a = row["title"]
             text_b = row["content"]
             label = row["label"]
             label = int(label)
             example = InputExample(guid=guid,
                                    seq_id=seq_id,
                                    text_a=text_a,
                                    text_b=text_b,
                                    label=label)
             examples.append(example)
             pbar(step=i)
         logger.info("Saving examples into cached file %s",
                     cached_examples_file)
         torch.save(examples, cached_examples_file)
     return examples
def evaluate(args, model, eval_dataloader, metrics):
    # Eval!
    logger.info("  Num examples = %d", len(eval_dataloader))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = AverageMeter()
    metrics.reset()
    preds = []
    targets = []
    pbar = ProgressBar(n_total=len(eval_dataloader), desc='Evaluating')
    for bid, batch in enumerate(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'labels': batch[3]
            }
            inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss, logits = outputs[:2]
            eval_loss.update(loss.item(), n=batch[0].size()[0])
        preds.append(logits.cpu().detach())
        targets.append(inputs['labels'].cpu().detach())
        pbar(bid)
    preds = torch.cat(preds, dim=0).cpu().detach()
    targets = torch.cat(targets, dim=0).cpu().detach()
    metrics(preds, targets)
    eval_log = {"eval_acc": metrics.value(), 'eval_loss': eval_loss.avg}
    return eval_log
예제 #10
0
def predict(args, model, tokenizer, prefix=""):
    '''模型预测'''
    pred_output_dir = args.output_dir
    if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(pred_output_dir)

    test_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type='test')
    # Note that DistributedSampler samples randomly
    test_sampler = SequentialSampler(
        test_dataset) if args.local_rank == -1 else DistributedSampler(
            test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=1,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running prediction %s *****", prefix)
    logger.info("  Num examples = %d", len(test_dataset))
    logger.info("  Batch size = %d", 1)

    results = []
    output_submit_file = os.path.join(pred_output_dir, prefix,
                                      "test_prediction.json")
    pbar = ProgressBar(n_total=len(test_dataloader), desc="Predicting")
    for step, batch in enumerate(test_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": None
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
        logits = outputs[0]
        preds = logits.detach().cpu().numpy()
        preds = np.argmax(preds, axis=2).tolist()
        preds = preds[0][1:-1]  # [CLS]XXXX[SEP]
        tags = [args.id2label[x] for x in preds]
        label_entities = get_entities(preds, args.id2label,
                                      args.markup)  # 得到实体
        json_d = {}
        json_d['id'] = step
        json_d['tag_seq'] = " ".join(tags)
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step)
    logger.info("\n")
    with open(output_submit_file, "w") as writer:
        for record in results:
            writer.write(json.dumps(record) + '\n')
예제 #11
0
def predict(args, model, tokenizer, prefix=""):
    pred_output_dir = args.output_dir
    if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(pred_output_dir)
    test_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type='test')
    print(len(test_dataset))
    # Note that DistributedSampler samples randomly
    test_sampler = SequentialSampler(
        test_dataset) if args.local_rank == -1 else DistributedSampler(
            test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=1,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running prediction %s *****", prefix)
    logger.info("  Num examples = %d", len(test_dataset))
    logger.info("  Batch size = %d", 1)

    results = []
    output_submit_file = os.path.join(pred_output_dir, prefix,
                                      "test_prediction.json")
    pbar = ProgressBar(n_total=len(test_dataloader), desc="Predicting")
    for step, batch in enumerate(test_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "start_positions": None,
                "end_positions": None
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
        start_logits, end_logits = outputs[:2]
        R = bert_extract_item(start_logits, end_logits)
        if R:
            label_entities = [[args.id2label[x[0]], x[1], x[2]] for x in R]
        else:
            label_entities = []
        json_d = {}
        json_d['id'] = step
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step)
    print(" ")
    with open(output_submit_file, "w") as writer:
        for record in results:
            writer.write(json.dumps(record) + '\n')
예제 #12
0
def predict(args, model, pred_dataloader, config):
    # Predict (without compute metrics)
    # args.predict_save_path = config['pred_dir'] / f'{args.pred_dir_name}'
    # args.predict_save_path.mkdir(exist_ok=True)

    logger.info("  Num examples = %d", len(pred_dataloader))
    logger.info("  Batch size = %d", args.eval_batch_size)
    seq_ids = []
    preds = []
    pbar = ProgressBar(n_total=len(pred_dataloader), desc='Predicting')
    for bid, batch in enumerate(pred_dataloader):
        model.eval()
        batch = tuple(
            t.to(args.device) if isinstance(t, torch.Tensor) else t
            for t in batch)
        seq_ids += list(batch[-1])
        with torch.no_grad():
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'labels': batch[3]
            }
            inputs['token_type_ids'] = batch[2]
            ##############
            # writer = SummaryWriter(config["output_dir"])
            # ips = {k: v[[0], ...] for k, v in inputs.items()}
            # ops = model(**ips)
            # model_graph_inputs = (
            #     ips["input_ids"], ips["attention_mask"], ips["token_type_ids"], [1,2], [3,4], ips["labels"])
            # writer.add_graph(model, model_graph_inputs)
            # writer.close()
            ##############
            outputs = model(**inputs)
            loss, logits = outputs[:2]
        preds.append(logits.cpu().detach())
        pbar(bid)
    preds = torch.cat(preds, dim=0).cpu().detach()
    preds_label = torch.argmax(preds, dim=1)
    result_label = DataFrame(data={
        "id": Series(seq_ids),
        "label": Series(preds_label)
    })
    result_label.to_csv(config["predict_result"], index=False)

    preds_softmax = torch.softmax(preds, dim=1)
    result_softmax = DataFrame(
        data={
            "id": Series(seq_ids),
            "label_0": Series(preds_softmax[:, 0]),
            "label_1": Series(preds_softmax[:, 1]),
            "label_2": Series(preds_softmax[:, 2])
        })
    result_softmax.to_csv(config["predict_softmax"], index=False)

    return result_label
    def create_features(self, examples, max_seq_len, cached_features_file):
        pbar = ProgressBar(n_total=len(examples))
        if cached_features_file.exists():
            logger.info("Loading features from cached file %s",
                        cached_features_file)
            features = torch.load(cached_features_file)
        else:
            features = []
            for ex_id, example in enumerate(examples):
                tokens = self.tokenizer.tokenize(example.text)
                label_ids = example.labels

                if len(tokens) > max_seq_len:
                    tokens = tokens[:max_seq_len]

                input_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                padding = [self.pad_id] * (max_seq_len - len(input_ids))
                input_len = len(input_ids)

                input_ids += padding

                assert len(input_ids) == max_seq_len

                if ex_id < 2:
                    logger.info("*** Example ***")
                    logger.info(f"guid: {example.guid}" % ())
                    logger.info(
                        f"tokens: {' '.join([str(x) for x in tokens])}")
                    logger.info(
                        f"input_ids: {' '.join([str(x) for x in input_ids])}")

                feature = InputFeature(input_ids=input_ids,
                                       label_ids=label_ids,
                                       input_len=input_len)
                features.append(feature)
                pbar.batch_step(step=ex_id,
                                info={},
                                bar_type='create features')
            logger.info("Saving features into cached file %s",
                        cached_features_file)
            torch.save(features, cached_features_file)
        return features
예제 #14
0
def evaluate(args, model, tokenizer, prefix=""):
    metric = SpanEntityScore(args.id2label)
    eval_output_dir = args.output_dir
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)
    eval_features = load_and_cache_examples(args, args.task_name, tokenizer, data_type='dev')
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Eval!
    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_features))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    pbar = ProgressBar(n_total=len(eval_features), desc="Evaluating")
    for step, f in enumerate(eval_features):
        input_lens = f.input_len
        input_ids = torch.tensor([f.input_ids[:input_lens]], dtype=torch.long).to(args.device)
        input_mask = torch.tensor([f.input_mask[:input_lens]], dtype=torch.long).to(args.device)
        segment_ids = torch.tensor([f.segment_ids[:input_lens]], dtype=torch.long).to(args.device)
        start_ids = torch.tensor([f.start_ids[:input_lens]], dtype=torch.long).to(args.device)
        end_ids = torch.tensor([f.end_ids[:input_lens]], dtype=torch.long).to(args.device)
        subjects = f.subjects
        model.eval()
        with torch.no_grad():
            inputs = {"input_ids": input_ids, "attention_mask": input_mask,
                      "start_positions": start_ids, "end_positions": end_ids}
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (segment_ids if args.model_type in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
        tmp_eval_loss, start_logits, end_logits = outputs[:3]
        R = bert_extract_item(start_logits, end_logits)
        T = subjects
        metric.update(true_subject=T, pred_subject=R)
        if args.n_gpu > 1:
            tmp_eval_loss = tmp_eval_loss.mean()  # mean() to average on multi-gpu parallel evaluating
        eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        pbar(step)
    logger.info("\n")
    eval_loss = eval_loss / nb_eval_steps
    eval_info, entity_info = metric.result()
    results = {f'{key}': value for key, value in eval_info.items()}
    results['loss'] = eval_loss
    logger.info("***** Eval results %s *****", prefix)
    info = "-".join([f' {key}: {value:.4f} ' for key, value in results.items()])
    logger.info(info)
    logger.info("***** Entity results %s *****", prefix)
    for key in sorted(entity_info.keys()):
        print("******* %s results ********" % key)
        info = "-".join([f' {key}: {value:.4f} ' for key, value in entity_info[key].items()])
        print(info)
    return results
예제 #15
0
def predict(args, model, tokenizer, lines, prefix=""):
    pred_output_dir = args.output_dir
    if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(pred_output_dir)
    test_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           lines,
                                           data_type='test')
    # Note that DistributedSampler samples randomly
    test_sampler = SequentialSampler(
        test_dataset) if args.local_rank == -1 else DistributedSampler(
            test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=1,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running prediction %s *****", prefix)
    logger.info("  Num examples = %d", len(test_dataset))
    logger.info("  Batch size = %d", 1)

    results = []
    pbar = ProgressBar(n_total=len(test_dataloader), desc="Predicting")
    if isinstance(model, nn.DataParallel):
        model = model.module
    for step, batch in enumerate(test_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": None,
                'input_lens': batch[4]
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            logits = outputs[0]
            preds, _ = model.crf._obtain_labels(logits, args.id2label,
                                                inputs['input_lens'])
        preds = preds[0][1:-1]  # [CLS]XXXX[SEP]
        label_entities = get_entities(preds, args.id2label, args.markup)
        json_d = {}
        json_d['id'] = step
        json_d['tag_seq'] = " ".join(preds)
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step)
    print(results[:3])
예제 #16
0
def predict(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    pred_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,)
    pred_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,)

    results = {}
    for pred_task, pred_output_dir in zip(pred_task_names, pred_outputs_dirs):
        pred_dataset = load_and_cache_examples(args, pred_task, tokenizer, data_type='test')
        if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(pred_output_dir)

        args.pred_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        pred_sampler = SequentialSampler(pred_dataset) if args.local_rank == -1 else DistributedSampler(pred_dataset)
        pred_dataloader = DataLoader(pred_dataset, sampler=pred_sampler, batch_size=args.pred_batch_size,
                                     collate_fn=collate_fn)

        logger.info("***** Running prediction {} *****".format(prefix))
        logger.info("  Num examples = %d", len(pred_dataset))
        logger.info("  Batch size = %d", args.pred_batch_size)
        nb_pred_steps = 0
        preds = None
        pbar = ProgressBar(n_total=len(pred_dataloader), desc="Predicting")
        for step, batch in enumerate(pred_dataloader):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'labels': batch[3]}
                if args.model_type != 'distilbert':
                    inputs['token_type_ids'] = batch[2] if (
                            'bert' in args.model_type or 'xlnet' in args.model_type) else None  # XLM, DistilBERT and RoBERTa don't use segment_ids
                outputs = model(**inputs)
                _, logits = outputs[:2]
            nb_pred_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            pbar(step)
        print(' ')
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        output_pred_file = os.path.join(pred_output_dir, prefix, "test_prediction.txt")
        with open(output_pred_file, "w") as writer:
            for pred in preds:
                writer.write(str(pred) + '\n')
    return results
예제 #17
0
 def _create_examples(self, lines, example_type):
     '''
         Creates examples for data
     '''
     datapbar = ProgressBar(n_total=len(lines), desc='create examples')
     examples = []
     for i, line in tqdm.tqdm(enumerate(lines)):
         id = line['query_id']
         context = line['passage']
         query = line['query']
         alternatives = line['alternatives'].split('|')
         random.shuffle(alternatives)
         if example_type == 'test':
             answer = None
         else:
             answer = self.get_anwser(line['answer'], alternatives)  # test 没有这项
         example = InputExample(example_id=id, question=query, contexts=context, endings=alternatives,
                                label=answer)
         examples.append(example)
         # pbar(step=i)
     return examples
예제 #18
0
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)
    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            args.weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0
        },
    ]
    args.warmup_steps = int(t_total * args.warmup_proportion)
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)
    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(
            args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt")):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)
    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)
    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
    )
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)
    global_step = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_name_or_path
                      ) and "checkpoint" in args.model_name_or_path:
        # set global_step to gobal_step of last saved checkpoint from model path
        global_step = int(args.model_name_or_path.split("-")[-1].split("/")[0])
        epochs_trained = global_step // (len(train_dataloader) //
                                         args.gradient_accumulation_steps)
        steps_trained_in_current_epoch = global_step % (
            len(train_dataloader) // args.gradient_accumulation_steps)
        logger.info(
            "  Continuing training from checkpoint, will skip to saved global_step"
        )
        logger.info("  Continuing training from epoch %d", epochs_trained)
        logger.info("  Continuing training from global step %d", global_step)
        logger.info("  Will skip the first %d steps in the first epoch",
                    steps_trained_in_current_epoch)

    tr_loss, logging_loss = 0.0, 0.0
    if args.do_adv:
        fgm = FGM(model, emb_name=args.adv_name, epsilon=args.adv_epsilon)
    model.zero_grad()
    seed_everything(
        args.seed
    )  # Added here for reproductibility (even between python 2 and 3)
    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3]
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)
            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            if args.do_adv:
                fgm.attack()
                loss_adv = model(**inputs)[0]
                if args.n_gpu > 1:
                    loss_adv = loss_adv.mean()
                loss_adv.backward()
                fgm.restore()
            pbar(step, {'loss': loss.item()})
            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(
                        amp.master_params(optimizer), args.max_grad_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   args.max_grad_norm)
                scheduler.step()  # Update learning rate schedule
                optimizer.step()
                model.zero_grad()
                global_step += 1
                if args.local_rank in [
                        -1, 0
                ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                    # Log metrics
                    print(" ")
                    if args.local_rank == -1:
                        # Only evaluate when single GPU otherwise metrics may not average well
                        evaluate(args, model, tokenizer)

                if args.local_rank in [
                        -1, 0
                ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(
                        args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Take care of distributed/parallel training
                    model_to_save = (model.module
                                     if hasattr(model, "module") else model)
                    model_to_save.save_pretrained(output_dir)
                    torch.save(args,
                               os.path.join(output_dir, "training_args.bin"))
                    tokenizer.save_vocabulary(output_dir)
                    logger.info("Saving model checkpoint to %s", output_dir)
                    torch.save(optimizer.state_dict(),
                               os.path.join(output_dir, "optimizer.pt"))
                    torch.save(scheduler.state_dict(),
                               os.path.join(output_dir, "scheduler.pt"))
                    logger.info("Saving optimizer and scheduler states to %s",
                                output_dir)
        print(" ")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
    return global_step, tr_loss / global_step
예제 #19
0
def evaluate(args, model, tokenizer, prefix=""):
    metric = SeqEntityScore(args.id2label, markup=args.markup)
    eval_output_dir = args.output_dir
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)
    eval_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type='dev')
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating")
    for step, batch in enumerate(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3]
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
        tmp_eval_loss, logits = outputs[:2]
        if args.n_gpu > 1:
            tmp_eval_loss = tmp_eval_loss.mean(
            )  # mean() to average on multi-gpu parallel evaluating
        eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        preds = np.argmax(logits.cpu().numpy(), axis=2).tolist()
        out_label_ids = inputs['labels'].cpu().numpy().tolist()
        for i, label in enumerate(out_label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif out_label_ids[i][j] == args.label2id['[SEP]']:
                    metric.update(pred_paths=[temp_2], label_paths=[temp_1])
                    break
                else:
                    temp_1.append(args.id2label[out_label_ids[i][j]])
                    temp_2.append(preds[i][j])
        pbar(step)
    print(' ')
    eval_loss = eval_loss / nb_eval_steps
    eval_info, entity_info = metric.result()
    results = {f'{key}': value for key, value in eval_info.items()}
    results['loss'] = eval_loss
    logger.info("***** Eval results %s *****", prefix)
    info = "-".join(
        [f' {key}: {value:.4f} ' for key, value in results.items()])
    logger.info(info)
    logger.info("***** Entity results %s *****", prefix)
    for key in sorted(entity_info.keys()):
        logger.info("******* %s results ********" % key)
        info = "-".join([
            f' {key}: {value:.4f} ' for key, value in entity_info[key].items()
        ])
        logger.info(info)
    return results
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(
        train_dataset) if args.local_rank == -1 else DistributedSampler(
            train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)

    if args.max_steps > 0:
        num_training_steps = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        num_training_steps = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs
    args.warmup_steps = int(num_training_steps * args.warmup_proportion)
    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    # optimizer = Lamb(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    optimizer = AdamW(params=optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=num_training_steps)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", num_training_steps)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    seed_everything(
        args.seed
    )  # Added here for reproductibility (even between python 2 and 3)
    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'labels': batch[3],
                'token_type_ids': batch[2]
            }
            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                               args.max_grad_norm)
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

            if args.local_rank in [
                    -1, 0
            ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                # Log metrics
                if args.local_rank == -1:  # Only evaluate when single GPU otherwise metrics may not average well
                    evaluate(args, model, tokenizer)

            if args.local_rank in [
                    -1, 0
            ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                # Save model checkpoint
                output_dir = os.path.join(args.output_dir,
                                          'checkpoint-{}'.format(global_step))
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                model_to_save = model.module if hasattr(
                    model, 'module'
                ) else model  # Take care of distributed/parallel training
                model_to_save.save_pretrained(output_dir)
                torch.save(args, os.path.join(output_dir, 'training_args.bin'))
                logger.info("Saving model checkpoint to %s", output_dir)
            pbar(step, {'loss': loss.item()})
        print(" ")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
    return global_step, tr_loss / global_step
def test(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    test_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (
        args.task_name, )
    test_outputs_dirs = (args.output_dir, args.output_dir +
                         '-MM') if args.task_name == "mnli" else (
                             args.output_dir, )

    results = {}
    for test_task, test_output_dir in zip(test_task_names, test_outputs_dirs):
        test_dataset = load_and_cache_examples(args,
                                               test_task,
                                               tokenizer,
                                               data_type='test')
        if not os.path.exists(test_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(test_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        test_sampler = SequentialSampler(
            test_dataset) if args.local_rank == -1 else DistributedSampler(
                test_dataset)
        test_dataloader = DataLoader(test_dataset,
                                     sampler=test_sampler,
                                     batch_size=args.eval_batch_size,
                                     collate_fn=collate_fn)

        # Test!
        logger.info("***** Running test {} *****".format(prefix))
        logger.info("  Num examples = %d", len(test_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        pbar = ProgressBar(n_total=len(test_dataloader), desc="Testing")
        for step, batch in enumerate(test_dataloader):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'labels': batch[3],
                    'token_type_ids': batch[2]
                }
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)
            pbar(step)
        print(' ')
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        result = compute_metrics(test_task, preds, out_label_ids)
        results.update(result)
        logger.info("***** Test results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
        classreport = ClassReport([
            'Joint', 'Sequence', 'Progression', "Contrast", "Supplement",
            "Cause-Result", "Result-Cause", "Background", "Behavior-Purpose",
            "Purpose-Behavior", "Elaboration", "Summary", "Evaluation",
            "Statement-Illustration", "Illustration-Statement"
        ])
        classreport(preds, out_label_ids)
        logger.info("%s : %s", classreport.name(), classreport.value())

    return results
예제 #22
0
def evaluate(args, model, tokenizer, label_lists, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,)
    eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,)

    results = {}
    logger.info("**** Evaluate *****")
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, data_type='dev')
        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size,
                                     collate_fn=collate_fn_ner)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating")
        for step, batch in enumerate(eval_dataloader):
            now = datetime.datetime.now()

            model.eval()
            batch = tuple(t.to(args.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'labels': batch[3]}
                inputs['token_type_ids'] = batch[2]
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]
                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = np.argmax(logits.detach().cpu().numpy(), axis = 2)
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds_argmax = np.argmax(logits.detach().cpu().numpy(), axis = 2)
                preds = collate_pred(preds, preds_argmax, label_lists)
                out_label_ids = collate_pred(out_label_ids, inputs['labels'].detach().cpu().numpy(), label_lists)
            pbar(step)

            delta = (datetime.datetime.now() - now).microseconds / 1000
            logger.info("*** Evaluating timecost, input length %d, timecost %d" \
                % (len(batch[0]), delta))
        print(' ')
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()
        eval_loss = eval_loss / nb_eval_steps
        evaluater = NerAccuracyEvaluator(label_lists, "WORD")
        
        result = evaluater.evaluate(preds, out_label_ids, args.label_with_bi)
        results.update(result)
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))

        # Store evaluate results
        if args.do_eval and args.output_eval:
            print_eval_output(args.output_dir, preds, out_label_ids, label_lists)
    return results
예제 #23
0
def predict(args, model, tokenizer, prefix=""):
    pred_output_dir = args.output_dir
    if not os.path.exists(pred_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(pred_output_dir)
    test_dataset = load_and_cache_examples(args,
                                           args.task_name,
                                           tokenizer,
                                           data_type='test')
    # Note that DistributedSampler samples randomly
    test_sampler = SequentialSampler(
        test_dataset) if args.local_rank == -1 else DistributedSampler(
            test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=test_sampler,
                                 batch_size=1,
                                 collate_fn=collate_fn)
    # Eval!
    logger.info("***** Running prediction %s *****", prefix)
    logger.info("  Num examples = %d", len(test_dataset))
    logger.info("  Batch size = %d", 1)
    results = []
    output_predict_file = os.path.join(pred_output_dir, prefix,
                                       "test_prediction.json")
    pbar = ProgressBar(n_total=len(test_dataloader), desc="Predicting")

    if isinstance(model, nn.DataParallel):
        model = model.module
    for step, batch in enumerate(test_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": None
            }
            if args.model_type != "distilbert":
                # XLM and RoBERTa don"t use segment_ids
                inputs["token_type_ids"] = (batch[2] if args.model_type
                                            in ["bert", "xlnet"] else None)
            outputs = model(**inputs)
            logits = outputs[0]
            tags = model.crf.decode(logits, inputs['attention_mask'])
            tags = tags.squeeze(0).cpu().numpy().tolist()
        preds = tags[0][1:-1]  # [CLS]XXXX[SEP]
        label_entities = get_entities(preds, args.id2label, args.markup)
        json_d = {}
        json_d['id'] = step
        json_d['tag_seq'] = " ".join([args.id2label[x] for x in preds])
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step)
    logger.info("\n")
    with open(output_predict_file, "w") as writer:
        for record in results:
            writer.write(json.dumps(record) + '\n')
    if args.task_name == 'cluener':
        output_submit_file = os.path.join(pred_output_dir, prefix,
                                          "test_submit.json")
        test_text = []
        with open(os.path.join(args.data_dir, "test.json"), 'r') as fr:
            for line in fr:
                test_text.append(json.loads(line))
        test_submit = []
        for x, y in zip(test_text, results):
            json_d = {}
            json_d['id'] = x['id']
            json_d['label'] = {}
            entities = y['entities']
            words = list(x['text'])
            if len(entities) != 0:
                for subject in entities:
                    tag = subject[0]
                    start = subject[1]
                    end = subject[2]
                    word = "".join(words[start:end + 1])
                    if tag in json_d['label']:
                        if word in json_d['label'][tag]:
                            json_d['label'][tag][word].append([start, end])
                        else:
                            json_d['label'][tag][word] = [[start, end]]
                    else:
                        json_d['label'][tag] = {}
                        json_d['label'][tag][word] = [[start, end]]
            test_submit.append(json_d)
        json_to_text(output_submit_file, test_submit)
예제 #24
0
def train(args, train_dataloader, eval_dataloader, metrics, model):
    """ Train the model """

    t_total = len(train_dataloader
                  ) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    args.warmup_steps = t_total * args.warmup_proportion
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=args.warmup_steps,
                                     t_total=t_total)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Distributed training (should be after apex fp16 initialization)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)
    # Train!
    logger.info("***** Running training *****")
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.train_batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.train_batch_size * args.gradient_accumulation_steps *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    best_acc = 0
    model.zero_grad()
    seed_everything(args.seed)
    for epoch in range(int(args.num_train_epochs)):
        tr_loss = AverageMeter()
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'labels': batch[3]
            }
            inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in transformers (see doc)

            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                               args.max_grad_norm)
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)
            tr_loss.update(loss.item(), n=1)
            pbar(step, info={"loss": loss.item()})
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

        train_log = {'loss': tr_loss.avg}
        eval_log = evaluate(args, model, eval_dataloader, metrics)
        logs = dict(train_log, **eval_log)
        show_info = f'\nEpoch: {epoch} - ' + "-".join(
            [f' {key}: {value:.4f} ' for key, value in logs.items()])
        logger.info(show_info)

        if logs['eval_acc'] > best_acc:
            logger.info(
                f"\nEpoch {epoch}: eval_acc improved from {best_acc} to {logs['eval_acc']}"
            )
            logger.info("save model to disk.")
            best_acc = logs['eval_acc']
            print("Valid Entity Score: ")
            model_to_save = model.module if hasattr(
                model, 'module') else model  # Only save the model it-self
            output_file = args.model_save_path
            output_file.mkdir(exist_ok=True)
            output_model_file = output_file / WEIGHTS_NAME
            torch.save(model_to_save.state_dict(), output_model_file)
            output_config_file = output_file / CONFIG_NAME
            with open(str(output_config_file), 'w') as f:
                f.write(model_to_save.config.to_json_string())
예제 #25
0
def evaluate(args, model, eval_dataloader, metrics):
    # Eval!
    logger.info("  Number of examples = %d", len(eval_dataloader))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = AverageMeter()
    metrics.reset()
    preds = []
    targets = []
    pbar = ProgressBar(n_total=len(eval_dataloader), desc='Evaluating')
    # pdb.set_trace()
    # (Pdb) a
    # args = Namespace(adam_epsilon=1e-08, albert_config_path=
    # 'pretrain/pytorch/albert_base_zh/albert_config_base.json',
    # arch='albert_base', bert_dir='pretrain/pytorch/albert_base_zh',
    # device=device(type='cuda'), do_eval=False, do_lower_case=False,
    # do_test=True, do_train=False, eval_all_checkpoints=False,
    # eval_batch_size=16, eval_max_seq_len=64, evaluate_during_training=False,
    # fp16=False, fp16_opt_level='O1', gradient_accumulation_steps=1, learning_rate=2e-05,
    # local_rank=-1, max_grad_norm=5.0, model_save_path=PosixPath('outputs/checkpoints/albert_base'),
    # n_gpu=1, no_cuda=False, num_train_epochs=3.0, overwrite_cache=False,
    # overwrite_output_dir=False, seed=42, server_ip='', server_port='',
    # share_type='all', task_name='lcqmc', train_batch_size=32, train_max_seq_len=64,
    # warmup_proportion=0.1, weight_decay=0.1)
    #
    # model = AlbertForSequenceClassification(
    #   (bert): AlbertModel(
    #     (embeddings): AlbertEmbeddings(
    #       (word_embeddings): Embedding(21128, 128, padding_idx=0)
    #       (word_embeddings_2): Linear(in_features=128, out_features=768, bias=False)
    #       (position_embeddings): Embedding(512, 768)
    #       (token_type_embeddings): Embedding(2, 768)
    #       (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    #       (dropout): Dropout(p=0.0, inplace=False)
    #     )
    #     (encoder): AlbertEncoder(
    #       (layer_shared): AlbertLayer(
    #         (attention): AlbertAttention(
    #           (self): AlbertSelfAttention(
    #             (query): Linear(in_features=768, out_features=768, bias=True)
    #             (key): Linear(in_features=768, out_features=768, bias=True)
    #             (value): Linear(in_features=768, out_features=768, bias=True)
    #             (dropout): Dropout(p=0.0, inplace=False)
    #           )
    #           (output): AlbertSelfOutput(
    #             (dense): Linear(in_features=768, out_features=768, bias=True)
    #             (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    #             (dropout): Dropout(p=0.0, inplace=False)
    #           )
    #         )
    #         (intermediate): AlbertIntermediate(
    #           (dense): Linear(in_features=768, out_features=3072, bias=True)
    #         )
    #         (output): AlbertOutput(
    #           (dense): Linear(in_features=3072, out_features=768, bias=True)
    #           (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    #           (dropout): Dropout(p=0.0, inplace=False)
    #         )
    #       )
    #     )
    #     (pooler): AlbertPooler(
    #       (dense): Linear(in_features=768, out_features=768, bias=True)
    #       (activation): Tanh()
    #     )
    #   )
    #   (dropout): Dropout(p=0.2, inplace=False)
    #   (classifier): Linear(in_features=768, out_features=2, bias=True)
    # )
    # eval_dataloader = <torch.utils.data.dataloader.DataLoader object at 0x7f113f07d668>
    # metrics = <common.metrics.Accuracy object at 0x7f11a904fa90>

    for bid, batch in enumerate(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels': batch[3]
            }
            # inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss, logits = outputs[:2]
            eval_loss.update(loss.item(), n=batch[0].size()[0])
        preds.append(logits.cpu().detach())
        targets.append(inputs['labels'].cpu().detach())
        pbar(bid)
        # pdb.set_trace()
        # (Pdb) pp batch[0].size(), batch[1].size(), batch[2].size(), batch[3].size()
        # (torch.Size([16, 64]), torch.Size([16, 64]), torch.Size([16, 64]), torch.Size([16]))
        # (Pdb) inputs['input_ids'][0]
        # tensor([ 101, 6443, 3300, 4312,  676, 6821, 2476, 7770, 3926, 4638,  102, 6821,
        #         2476, 7770, 3926, 1745, 8024, 6443, 3300,  102,    0,    0,    0,    0,
        #            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        #            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        #            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
        #            0,    0,    0,    0], device='cuda:0')
        # (Pdb) inputs['attention_mask'][0]
        # tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        #         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        #         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')
        # (Pdb) inputs['token_type_ids'][0]
        # tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        #         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        #         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')
        # (Pdb) inputs['labels']
        # tensor([0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1], device='cuda:0')

    preds = torch.cat(preds, dim=0).cpu().detach()
    targets = torch.cat(targets, dim=0).cpu().detach()
    metrics(preds, targets)
    eval_log = {"eval_acc": metrics.value(), 'eval_loss': eval_loss.avg}
    return eval_log
    def create_features(self, examples, max_seq_len, cached_features_file):
        '''
        # The convention in BERT is:
        # (a) For sequence pairs:
        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
        #  type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
        # (b) For single sequences:
        #  tokens:   [CLS] the dog is hairy . [SEP]
        #  type_ids:   0   0   0   0  0     0   0
        '''
        pbar = ProgressBar(n_total=len(examples), desc='create features')
        if cached_features_file.exists():
            logger.info("Loading features from cached file %s",
                        cached_features_file)
            features = torch.load(cached_features_file)
        else:
            features = []
            for ex_id, example in enumerate(examples):
                tokens_a = self.tokenizer.tokenize(example.text_a)
                tokens_b = None
                label_id = example.label

                if example.text_b:
                    tokens_b = self.tokenizer.tokenize(example.text_b)
                    # Modifies `tokens_a` and `tokens_b` in place so that the total
                    # length is less than the specified length.
                    # Account for [CLS], [SEP], [SEP] with "- 3"
                    self.truncate_seq_pair(tokens_a,
                                           tokens_b,
                                           max_length=max_seq_len - 3)
                else:
                    # Account for [CLS] and [SEP] with '-2'
                    if len(tokens_a) > max_seq_len - 2:
                        tokens_a = tokens_a[:max_seq_len - 2]
                tokens = ['[CLS]'] + tokens_a + ['[SEP]']
                segment_ids = [0] * len(tokens)
                if tokens_b:
                    tokens += tokens_b + ['[SEP]']
                    segment_ids += [1] * (len(tokens_b) + 1)

                input_ids = self.tokenizer.convert_tokens_to_ids(tokens)
                input_mask = [1] * len(input_ids)
                padding = [0] * (max_seq_len - len(input_ids))
                input_len = len(input_ids)

                input_ids += padding
                input_mask += padding
                segment_ids += padding

                assert len(input_ids) == max_seq_len
                assert len(input_mask) == max_seq_len
                assert len(segment_ids) == max_seq_len

                if ex_id < 2:
                    logger.info("*** Example ***")
                    logger.info(f"guid: {example.guid}" % ())
                    logger.info(
                        f"tokens: {' '.join([str(x) for x in tokens])}")
                    logger.info(
                        f"input_ids: {' '.join([str(x) for x in input_ids])}")
                    logger.info(
                        f"input_mask: {' '.join([str(x) for x in input_mask])}"
                    )
                    logger.info(
                        f"segment_ids: {' '.join([str(x) for x in segment_ids])}"
                    )
                    logger.info(f"label id : {label_id}")

                feature = InputFeature(input_ids=input_ids,
                                       input_mask=input_mask,
                                       segment_ids=segment_ids,
                                       label_id=label_id,
                                       input_len=input_len)
                features.append(feature)
                pbar(step=ex_id)
            logger.info("Saving features into cached file %s",
                        cached_features_file)
            torch.save(features, cached_features_file)
        return features
예제 #27
0
def train(args, train_features, model, tokenizer, use_crf):
    """ Train the model """
    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    bert_param_optimizer = list(model.bert.named_parameters())

    if args.model_encdec == 'bert2crf':
        crf_param_optimizer = list(model.crf.named_parameters())
        linear_param_optimizer = list(model.classifier.named_parameters())
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in bert_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in bert_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in crf_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in crf_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.crf_learning_rate
        }]

    elif args.model_encdec == 'bert2gru':
        gru_param_optimizer = list(model.decoder.named_parameters())
        linear_param_optimizer = list(model.clsdense.named_parameters())
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in bert_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in bert_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in gru_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in gru_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.crf_learning_rate
        }]

    elif args.model_encdec == 'bert2soft':
        linear_param_optimizer = list(model.classifier.named_parameters())
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in bert_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in bert_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.crf_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.crf_learning_rate
        }]

    elif args.model_encdec == 'multi2point':
        # gru_param_optimizer = list(model.decoder.named_parameters())
        linear_param_optimizer = list(model.pointer.named_parameters())
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in bert_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in bert_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            args.weight_decay,
            'lr':
            args.point_learning_rate
        }, {
            'params': [
                p for n, p in linear_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0,
            'lr':
            args.point_learning_rate
        }]

    t_total = len(train_features) // args.batch_size * args.num_train_epochs
    args.warmup_steps = int(t_total * args.warmup_proportion)

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=t_total)
    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(
            args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
                os.path.join(args.model_name_or_path, "scheduler.pt")):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(
            torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_features))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d", args.batch_size)
    logger.info(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d",
        args.batch_size *
        (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
    )
    # logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    pre_result = {}
    model.zero_grad()
    seed_everything(
        args.seed
    )  # Added here for reproductibility (even between python 2 and 3)
    total_step = 0
    best_spanf = -1

    test_results = {}
    for ep in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_features) // args.batch_size,
                           desc='Training')
        if ep == int(args.num_train_epochs) - 1:
            eval_features = load_and_cache_examples(args,
                                                    args.data_type,
                                                    tokenizer,
                                                    data_type='dev')
            train_features.extend(eval_features)

        step = 0
        for batch in batch_generator(features=train_features,
                                     batch_size=args.batch_size,
                                     use_crf=use_crf,
                                     answer_seq_len=args.answer_seq_len):
            batch_input_ids, batch_input_mask, batch_segment_ids, batch_label_ids, batch_multi_span_label, batch_context_mask, batch_start_position, batch_end_position, batch_raw_labels, _, batch_example = batch
            model.train()
            if args.model_encdec == 'bert2crf' or args.model_encdec == 'bert2gru' or args.model_encdec == 'bert2soft':
                batch_inputs = tuple(t.to(args.device) for t in batch[0:6])
                inputs = {
                    "input_ids": batch_inputs[0],
                    "attention_mask": batch_inputs[1],
                    "token_type_ids": batch_inputs[2],
                    "context_mask": batch_inputs[5],
                    "labels": batch_inputs[3],
                    "testing": False
                }

            elif args.model_encdec == 'multi2point':
                batch_inputs = tuple(t.to(args.device) for t in batch[0:5])
                inputs = {
                    "input_ids": batch_inputs[0],
                    "attention_mask": batch_inputs[1],
                    "token_type_ids": batch_inputs[2],
                    "span_label": batch_inputs[4],
                    "testing": False
                }

            outputs = model(**inputs)
            loss = outputs[
                0]  # model outputs are always tuple in pytorch-transformers (see doc)
            if args.n_gpu > 1:
                loss = loss.mean(
                )  # mean() to average on multi-gpu parallel training
            loss.backward()
            if step % 15 == 0:
                pbar(step, {'epoch': ep, 'loss': loss.item()})
            step += 1
            tr_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)
            scheduler.step()  # Update learning rate schedule
            optimizer.step()
            model.zero_grad()
            global_step += 1
            if args.local_rank in [
                    -1, 0
            ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                # Log metrics
                print("start evalue")
                if args.local_rank == -1:
                    # Only evaluate when single GPU otherwise metrics may not average well
                    results = evaluate(args=args,
                                       model=model,
                                       tokenizer=tokenizer,
                                       prefix="dev",
                                       use_crf=use_crf)
                    span_f = results['span_f']
                    if span_f > best_spanf:
                        output_dir = os.path.join(args.output_dir,
                                                  "checkpoint-bestf")
                        if os.path.exists(output_dir):
                            shutil.rmtree(output_dir)
                            print('remove file', args.output_dir)
                        print('\n\n eval results:', results)
                        test_results = evaluate(args=args,
                                                model=model,
                                                tokenizer=tokenizer,
                                                prefix="test",
                                                use_crf=use_crf)
                        print('\n\n test results', test_results)
                        print('\n epoch = :', ep)

                        best_spanf = span_f
                        os.makedirs(output_dir)
                        # print('dir = ', output_dir)
                        model_to_save = (
                            model.module if hasattr(model, "module") else model
                        )  # Take care of distributed/parallel training
                        model_to_save.save_pretrained(output_dir)
                        torch.save(
                            args, os.path.join(output_dir,
                                               "training_args.bin"))
                        logger.info("Saving model checkpoint to %s",
                                    output_dir)
                        tokenizer.save_vocabulary(output_dir)
                        torch.save(optimizer.state_dict(),
                                   os.path.join(output_dir, "optimizer.pt"))
                        torch.save(scheduler.state_dict(),
                                   os.path.join(output_dir, "scheduler.pt"))
                        logger.info(
                            "Saving optimizer and scheduler states to %s",
                            output_dir)

        np.random.seed()
        np.random.shuffle(train_features)
        logger.info("\n")
        # if 'cuda' in str(args.device):
        torch.cuda.empty_cache()
    return global_step, tr_loss / global_step, test_results
예제 #28
0
def evaluate(args, model, tokenizer, prefix="dev", use_crf=False):
    eval_output_dir = args.output_dir
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)
    eval_features = load_and_cache_examples(args,
                                            args.data_type,
                                            tokenizer,
                                            data_type=prefix)
    processor = processors[args.data_type]()

    logger.info("***** Running evaluation %s *****", prefix)
    logger.info("  Num examples = %d", len(eval_features))
    logger.info("  Batch size = %d", args.batch_size)
    eval_loss = 0.0

    pbar = ProgressBar(n_total=len(eval_features), desc="Evaluating" + prefix)
    if isinstance(model, nn.DataParallel):
        model = model.module

    pre_labels, tru_labels, eval_examples = [], [], []
    step = 0
    for batch in batch_generator(features=eval_features,
                                 batch_size=args.batch_size,
                                 use_crf=use_crf,
                                 answer_seq_len=args.answer_seq_len):

        batch_input_ids, batch_input_mask, batch_segment_ids, batch_label_ids, batch_multi_span_label, batch_context_mask, batch_start_position, batch_end_position, batch_raw_labels, _, batch_example = batch
        model.eval()
        if args.model_encdec == 'bert2crf' or args.model_encdec == 'bert2gru' or args.model_encdec == 'bert2soft':
            batch_inputs = tuple(t.to(args.device) for t in batch[0:6])
            inputs = {
                "input_ids": batch_inputs[0],
                "attention_mask": batch_inputs[1],
                "token_type_ids": batch_inputs[2],
                "context_mask": batch_inputs[5],
                "testing": True
            }

        elif args.model_encdec == 'multi2point':
            batch_inputs = tuple(t.to(args.device) for t in batch[0:5])
            inputs = {
                "input_ids": batch_inputs[0],
                "attention_mask": batch_inputs[1],
                "token_type_ids": batch_inputs[2],
                "testing": True
            }

        outputs = model(**inputs)
        eval_examples.extend(batch_example)
        out_label_ids = batch[8].tolist()
        batch_lens = torch.sum(batch_context_mask, -1).cpu().numpy().tolist()

        if args.model_encdec == 'bert2crf':
            logits = outputs[0]
            tags = model.crf.decode(logits, inputs['attention_mask'])
            tags = tags.squeeze(0).cpu().numpy().tolist()

            for len_doc, cu_tags, cu_trus, exam in zip(batch_lens, tags,
                                                       out_label_ids,
                                                       batch_example):
                emotion_len = exam.emotion_len
                pre_labels.append(cu_tags[1:len_doc + 1])
                tru_labels.append(cu_trus[1:len_doc + 1])

        elif args.model_encdec == 'multi2point':

            start_label, end_label = outputs  #[batch, ans_len]
            start_label, end_label = start_label.cpu().numpy().tolist(
            ), end_label.cpu().numpy().tolist()

            pres_batch = []
            for s_num, e_num in zip(start_label, end_label):
                pre_tag = [0] * args.max_seq_length
                for s, e in zip(s_num, e_num):
                    if s < e - 1:
                        pre_tag[s] = 1
                        pre_tag[s + 1:e] = [2] * (e - s - 1)
                    elif s == e - 1:
                        pre_tag[s] = 1
                pres_batch.append(pre_tag)

            for len_doc, cu_tags, cu_trus, exam in zip(batch_lens, pres_batch,
                                                       out_label_ids,
                                                       batch_example):
                emotion_len = exam.emotion_len
                pre_labels.append(cu_tags[1:len_doc + 1])
                tru_labels.append(cu_trus[1:len_doc + 1])

        elif args.model_encdec == 'bert2gru' or args.model_encdec == 'bert2soft':
            tags = outputs.detach().cpu().numpy()
            tags = tags.tolist()

            for len_doc, cu_tags, cu_trus, exam in zip(batch_lens, tags,
                                                       out_label_ids,
                                                       batch_example):
                pre_labels.append(cu_tags[1:len_doc + 1])
                tru_labels.append(cu_trus[1:len_doc + 1])

        step += 1

        if step % 20 == 0:
            pbar(step)

    logger.info("\n")
    results = get_prf(pre_labels, tru_labels, eval_examples)

    logger.info("***** Eval results %s *****", prefix)
    info = "-".join(
        [f' {key}: {value:.4f} ' for key, value in results.items()])
    logger.info(info)

    return results
    def _test(self, args, model, prefix=""):
        # Loop to handle MNLI double evaluation (matched, mis-matched)
        test_task_names = ("mnli",
                           "mnli-mm") if args.task_name == "mnli" else (
                               args.task_name, )
        test_outputs_dirs = (args.output_dir, args.output_dir +
                             '-MM') if args.task_name == "mnli" else (
                                 args.output_dir, )

        results = {}
        for test_task, test_output_dir in zip(test_task_names,
                                              test_outputs_dirs):
            test_dataset = self.dataset["test_dataset"]
            if not os.path.exists(test_output_dir) and args.local_rank in [
                    -1, 0
            ]:
                os.makedirs(test_output_dir)

            args.eval_batch_size = args.per_gpu_eval_batch_size * max(
                1, args.n_gpu)
            # Note that DistributedSampler samples randomly
            test_dataloader = DataLoader(test_dataset,
                                         batch_size=args.eval_batch_size)

            # Test!
            print("***** Running test {} *****".format(prefix))
            print("  Num examples = %d", len(test_dataset))
            print("  Batch size = %d", args.eval_batch_size)
            eval_loss = 0.0
            nb_eval_steps = 0
            preds = None
            out_label_ids = None
            pbar = ProgressBar(n_total=len(test_dataloader), desc="Testing")
            for step, batch in enumerate(test_dataloader):
                model.eval()
                with torch.no_grad():
                    inputs = {
                        'input_ids': batch["input_ids"].to(args.device),
                        'attention_mask':
                        batch['attention_mask'].to(args.device),
                        'token_type_ids':
                        batch['token_type_ids'].to(args.device),
                        "labels": batch["labels"].to(args.device)
                    }
                    outputs = model(**inputs)
                    tmp_eval_loss, logits = outputs.loss, outputs.logits
                    eval_loss += tmp_eval_loss.mean().item()
                nb_eval_steps += 1
                if preds is None:
                    preds = logits.detach().cpu().numpy()
                    out_label_ids = inputs['labels'].detach().cpu().numpy()
                else:
                    preds = np.append(preds,
                                      logits.detach().cpu().numpy(),
                                      axis=0)
                    out_label_ids = np.append(
                        out_label_ids,
                        inputs['labels'].detach().cpu().numpy(),
                        axis=0)
                pbar(step)
            print(' ')
            if 'cuda' in str(args.device):
                torch.cuda.empty_cache()
            self._save_result(args.model_type + str(prefix), preds,
                              out_label_ids)
            preds = np.argmax(preds, axis=1)
            result = acc_and_f1(preds, out_label_ids, average="macro")
            results.update(result)
        return results
예제 #30
0
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size,
                                  collate_fn=collate_fn)

    if args.max_steps > 0:
        num_training_steps = args.max_steps
        args.num_train_epochs = args.max_steps // (
            len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        num_training_steps = len(
            train_dataloader
        ) // args.gradient_accumulation_steps * args.num_train_epochs
    args.warmup_steps = int(num_training_steps * args.warmup_proportion)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = AdamW(params=optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=num_training_steps)

    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_dataset))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.per_gpu_train_batch_size)
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", num_training_steps)

    global_step = 0
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    seed_everything(
        args.seed
    )  # Added here for reproductibility (even between python 2 and 3)
    for _ in range(int(args.num_train_epochs)):
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
        for step, batch in enumerate(train_dataloader):
            model.train()
            batch = tuple(t.to(args.device) for t in batch)
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'labels': batch[3]
            }
            inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss = outputs[0]

            if args.n_gpu > 1:
                loss = loss.mean()
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step += 1

            if args.local_rank in [
                    -1, 0
            ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                # Log metrics
                if args.local_rank == -1:
                    evaluate(args, model, tokenizer)

            # if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
            #     output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
            #     if not os.path.exists(output_dir):
            #         os.makedirs(output_dir)
            #     model_to_save = model.module if hasattr(model,
            #                                             'module') else model
            #     model_to_save.save_pretrained(output_dir)
            #     torch.save(args, os.path.join(output_dir, 'training_args.bin'))
            #     logger.info("Saving model checkpoint to %s", output_dir)
            pbar(step, {'loss': loss.item()})

        print(" ")
        if 'cuda' in str(args.device):
            torch.cuda.empty_cache()

    # if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
    output_dir = os.path.join(args.output_dir,
                              'checkpoint-{}'.format(global_step))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    model_to_save = model.module if hasattr(model, 'module') else model
    model_to_save.save_pretrained(output_dir)
    torch.save(args, os.path.join(output_dir, 'training_args.bin'))
    logger.info("Saving model checkpoint to %s", output_dir)

    return global_step, tr_loss / global_step