Exemple #1
0
    def post_processing_function(examples,
                                 features,
                                 predictions,
                                 stage="eval"):
        # Post-processing: we match the start logits and end logits to answers in the original context.
        predictions = postprocess_qa_predictions(
            examples=examples,
            features=features,
            predictions=predictions,
            version_2_with_negative=args.version_2_with_negative,
            n_best_size=args.n_best_size,
            max_answer_length=args.max_answer_length,
            null_score_diff_threshold=args.null_score_diff_threshold,
            output_dir=args.output_dir,
            prefix=stage,
        )
        # Format the result to the format the metric expects.
        if args.version_2_with_negative:
            formatted_predictions = [{
                "id": k,
                "prediction_text": v,
                "no_answer_probability": 0.0
            } for k, v in predictions.items()]
        else:
            formatted_predictions = [{
                "id": k,
                "prediction_text": v
            } for k, v in predictions.items()]

        references = [{
            "id": ex["id"],
            "answers": ex[answer_column_name]
        } for ex in examples]
        return EvalPrediction(predictions=formatted_predictions,
                              label_ids=references)
Exemple #2
0
 def post_processing_function(examples, features, predictions):
     # Post-processing: we match the start logits and end logits to answers in the original context.
     predictions, scores_diff_json = postprocess_qa_predictions_with_beam_search(
         examples=examples,
         features=features,
         predictions=predictions,
         version_2_with_negative=data_args.version_2_with_negative,
         n_best_size=data_args.n_best_size,
         max_answer_length=data_args.max_answer_length,
         start_n_top=model.config.start_n_top,
         end_n_top=model.config.end_n_top,
         output_dir=training_args.output_dir,
         is_world_process_zero=trainer.is_world_process_zero(),
     )
     # Format the result to the format the metric expects.
     if data_args.version_2_with_negative:
         formatted_predictions = [{
             "id":
             k,
             "prediction_text":
             v,
             "no_answer_probability":
             scores_diff_json[k]
         } for k, v in predictions.items()]
     else:
         formatted_predictions = [{
             "id": k,
             "prediction_text": v
         } for k, v in predictions.items()]
     references = [{
         "id": ex["id"],
         "answers": ex[answer_column_name]
     } for ex in datasets["validation"]]
     return EvalPrediction(predictions=formatted_predictions,
                           label_ids=references)
Exemple #3
0
 def _post_processing_function(self, examples, features, predictions,
                               output_dir):
     # Post-processing: we match the start logits and end logits to answers in the original context.
     data_args = self.data_args
     training_args = self.training_args
     predictions = postprocess_qa_predictions(
         examples=examples,
         features=features,
         predictions=predictions,
         version_2_with_negative=data_args.version_2_with_negative,
         n_best_size=data_args.n_best_size,
         max_answer_length=data_args.max_answer_length,
         null_score_diff_threshold=data_args.null_score_diff_threshold,
         output_dir=output_dir,
         is_world_process_zero=self.trainer.is_world_process_zero(),
     )
     # Format the result to the format the metric expects.
     if data_args.version_2_with_negative:
         formatted_predictions = [{
             "id": k,
             "prediction_text": v,
             "no_answer_probability": 0.0
         } for k, v in predictions.items()]
     else:
         formatted_predictions = [{
             "id": k,
             "prediction_text": v
         } for k, v in predictions.items()]
     references = [{
         "id": ex["id"],
         "answers": ex[self.answer_column_name]
     } for ex in self.datasets["validation"]]
     return EvalPrediction(predictions=formatted_predictions,
                           label_ids=references)
    def _post_processing_function(self, examples, features, predictions,
                                  training_args):
        pred_results = postprocess_qa_predictions(
            examples=examples,
            features=features,
            predictions=predictions,
            training_args=training_args,
            topk=self.args.retriever.topk,
            max_answer_length=self.args.data.max_answer_length,
            output_dir=training_args.output_dir,
            prefix="test" if self.args.train.do_predict else "valid",
        )

        for k in pred_results.keys():
            assert k in ["predictions", "pororo_predictions"]

        formatted_predictions = [{
            "id": k,
            "prediction_text": v
        } for k, v in pred_results["predictions"].items()]

        if training_args.do_predict:
            return formatted_predictions

        if not training_args.do_eval:  # do_predict, do_eval 모두 False일 때 여기까지 오면 이상한 거
            raise KeyError("run evaluate with do_predict or do_eval")

        references = [{
            "id": ex["id"],
            "answers": ex[self.answer_column_name]
        } for ex in self.eval_answers]

        valid_results = {}
        valid_results["predictions"] = EvalPrediction(
            predictions=formatted_predictions, label_ids=references)

        if training_args.pororo_prediction:
            formatted_pororo_predictions = [{
                "id": k,
                "prediction_text": v
            } for k, v in pred_results["pororo_predictions"].items()]

            valid_results["pororo_predictions"] = EvalPrediction(
                predictions=formatted_pororo_predictions, label_ids=references)

        return valid_results
Exemple #5
0
def self_test():
    pred = EvalPrediction(
        label_ids=np.array([[-100, 1, -100], [2, -100, -100], [-100, -100, 3],
                            [-100, -100, 4]]),
        predictions=np.array([
            [-100, 1, -100],  # 1 true positive
            [2, -100, -100],  # 1 true positive
            [2, 6, 8],  # 1 false positive, irrelevant pos will be ignored
            [1, 7, 4]  # 1 true positive, irrelevant pos will be ignored
        ]))
    m = compute_metrics(pred)
    print(f"recall={m['recall']}")
    assert m['recall'] == 0.75
    print("Looks like it is working!")
 def post_processing_function_for_eval(examples, features, predictions, train_args):
     # Post-processing: we match the start logits and end logits to answers in the original context.
     predictions = postprocess_qa_predictions(
         examples=examples,
         features=features,
         predictions=predictions,
         max_answer_length=token_args.max_answer_length,
         output_dir=train_args.output_dir,
     )
     # Format the result to the format the metric expects.
     formatted_predictions = [
         {"id": k, "prediction_text": v} for k, v in predictions.items()
     ]
     references = [
         {"id": ex["id"], "answers": ex[answer_column_name]}
         for ex in datasets["validation"]
     ]
     return EvalPrediction(predictions=formatted_predictions, label_ids=references)
Exemple #7
0
 def post_proc(self, xs, features, preds, stage="eval"):
     ps = self.params
     ys = proc_tests(
         examples=xs,
         features=features,
         predictions=preds,
         version_2_with_negative=ps.version_2_with_negative,
         n_best_size=ps.n_best_size,
         max_answer_length=ps.max_answer_length,
         null_score_diff_threshold=ps.null_score_diff_threshold,
         out_dir=ps.out_dir,
         prefix=stage,
     )
     if ps.version_2_with_negative:
         ys = [
             {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in ys.items()
         ]
     else:
         ys = [{"id": k, "prediction_text": v} for k, v in ys.items()]
     ids = [{"id": x["id"], "answers": x[self.cols[EACH][2]]} for x in xs]
     return EvalPrediction(predictions=ys, label_ids=ids)
def post_processing_function(examples, features, predictions, text_data,
                             data_args, training_args):
    '''Model의 Prediction을 Text 형태로 변환하는 함수'''
    predictions = postprocess_qa_predictions(
        examples=examples,
        features=features,
        predictions=predictions,
        max_answer_length=data_args.max_answer_length,
        output_dir=training_args.output_dir,
    )

    formatted_predictions = [{
        "id": k,
        "prediction_text": last_processing(v)
    } for k, v in predictions.items()]
    if training_args.do_predict:
        return formatted_predictions

    references = [{
        "id": ex["id"],
        "answers": ex["answers"]
    } for ex in text_data["validation"]]
    return EvalPrediction(predictions=formatted_predictions,
                          label_ids=references)
Exemple #9
0
 def post_proc(self, xs, features, preds, stage="eval"):
     ps = self.params
     ys, diff = proc_preds(
         examples=xs,
         features=features,
         predictions=preds,
         version_2_with_negative=ps.version_2_with_negative,
         n_best_size=ps.n_best_size,
         max_answer_length=ps.max_answer_length,
         start_n_top=self.model.config.start_n_top,
         end_n_top=self.model.config.end_n_top,
         out_dir=ps.out_dir,
         prefix=stage,
     )
     if ps.version_2_with_negative:
         ys = [{
             "id": k,
             "prediction_text": v,
             "no_answer_probability": diff[k]
         } for k, v in ys.items()]
     else:
         ys = [{"id": k, "prediction_text": v} for k, v in ys.items()]
     ids = [{"id": x["id"], "answers": x[self.cols[EACH][2]]} for x in xs]
     return EvalPrediction(predictions=ys, label_ids=ids)
Exemple #10
0
    def prediction_loop(self,
                        dataloader: DataLoader,
                        description: str,
                        prediction_loss_only: Optional[bool] = None,
                        extract_path: Optional[str] = None,
                        cache_path: Optional[str] = None) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.

        Works both with or without labels.
        """
        prediction_loss_only = (prediction_loss_only
                                if prediction_loss_only is not None else
                                self.args.prediction_loss_only)

        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        else:
            model = self.model
        # Note: in torch.distributed mode, there's no point in wrapping the model
        # inside a DistributedDataParallel as we'll be under `no_grad` anyways.

        batch_size = dataloader.batch_size
        eval_losses: List[float] = []
        hidden_states: torch.tensor = None
        preds: torch.Tensor = None
        label_ids: torch.Tensor = None
        model.eval()

        if self.args.past_index >= 0:
            self._past = None

        # Unfortunate, but we'll run through the dataloader once to count the number of tokens (or this could be pre-processed)
        if extract_path is not None:
            stimulus_mask = lambda tokens: (tokens != 101) & (tokens != 102
                                                              ) & (tokens != 0)
            cached_masks = None
            if osp.exists(f"{cache_path}.npy"):
                # np instead of torch, something's funky with Vivek's env.
                cached_masks = torch.from_numpy(np.load(f"{cache_path}.npy"))
            else:
                all_masks = None
                limit_tokens = self.custom_cfg.TASK.EXTRACT_TOKENS_LIMIT
                # Calculate the random ratio of tokens to grab (we specify number of tokens to extract)
                total_tokens = 0
                for inputs in dataloader:
                    tokens = inputs["input_ids"]
                    total_tokens += stimulus_mask(tokens).sum()
                subset_ratio = torch.true_divide(limit_tokens, total_tokens)

        # Seed, we want to be sure that we're finding the same stimuli
        disable_tqdm = not self.is_local_process_zero(
        ) or self.args.disable_tqdm
        samples_count = 0
        for inputs in tqdm(dataloader, desc=description, disable=disable_tqdm):
            loss, logits, labels, states = self.prediction_step(
                model,
                inputs,
                prediction_loss_only,
                output_hidden_states=extract_path is not None)
            batch_size = inputs[list(inputs.keys())[0]].shape[0]
            if loss is not None:
                eval_losses.append(loss * batch_size)
            if states is not None:
                # L + 1 [ Batch x Length x Hidden ] (layers and embedding)
                if cached_masks is not None:
                    cached_masks = cached_masks.to(logits.device)
                    mask = cached_masks[samples_count:samples_count +
                                        inputs["input_ids"].shape[0]]  # B x T
                    mask = mask[:, :inputs["input_ids"].
                                shape[1]]  # Dynamic padding
                else:
                    subset_mask = torch.full(inputs["input_ids"].shape,
                                             subset_ratio,
                                             device=logits.device)
                    mask = (torch.bernoulli(subset_mask).long() &
                            stimulus_mask(inputs["input_ids"])).bool()  # B X T
                    if all_masks is None:
                        all_masks = mask
                    else:
                        all_masks = nested_concat(all_masks,
                                                  mask,
                                                  padding_index=-100)  # B x T
                # [1:] to drop embedding layer
                states = torch.stack(states)[1:].permute(1, 2, 0,
                                                         3)  # B x T x L x H
                target_tokens = states[mask]  # M x L x H
                if hidden_states is None:
                    hidden_states = target_tokens
                else:
                    hidden_states = torch.cat([hidden_states, target_tokens],
                                              dim=0)
            samples_count += batch_size
            if logits is not None:
                preds = logits if preds is None else nested_concat(
                    preds, logits, padding_index=-100)
            if labels is not None:
                label_ids = labels if label_ids is None else nested_concat(
                    label_ids, labels, padding_index=-100)

        if extract_path is not None:
            os.makedirs(osp.split(extract_path)[0], exist_ok=True)
            np.save(extract_path,
                    hidden_states.half().cpu().numpy())  # half to save memory
            if cached_masks is None:
                os.makedirs(osp.split(cache_path)[0], exist_ok=True)
                np.save(cache_path, all_masks.cpu().numpy())

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        if self.args.local_rank != -1:
            # In distributed mode, concatenate all results from all nodes:
            if preds is not None:
                preds = self.distributed_concat(
                    preds, num_total_examples=self.num_examples(dataloader))
            if label_ids is not None:
                label_ids = self.distributed_concat(
                    label_ids,
                    num_total_examples=self.num_examples(dataloader))

        # Finally, turn the aggregated tensors into numpy arrays.
        if preds is not None:
            preds = preds.cpu().numpy()
        if label_ids is not None:
            label_ids = label_ids.cpu().numpy()

        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(
                EvalPrediction(predictions=preds, label_ids=label_ids))
        else:
            metrics = {}
        if len(eval_losses) > 0:
            metrics["eval_loss"] = np.sum(eval_losses) / samples_count

        # Prefix all keys with eval_
        for key in list(metrics.keys()):
            if not key.startswith("eval_"):
                metrics[f"eval_{key}"] = metrics.pop(key)

        return PredictionOutput(predictions=preds,
                                label_ids=label_ids,
                                metrics=metrics)
def main():

    #_use_cuda()
    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
    model_args, data_args, training_args = parser.parse_args_into_dataclasses()

    config = AutoConfig.from_pretrained(
    model_args.config_name if model_args.config_name else model_args.model_name_or_path,
    num_labels=3,
    )

    # Set seed
    set_seed(training_args.seed)

    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
    )
    model = AutoModelForMultitaskSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        config=config,
    )

    # print(model.state_dict())
    # Fetch Datasets
    train_set = SarcArgDataset(_load_data(data_args), tokenizer) if training_args.do_train else None
    eval_dataset = SarcArgDataset(_load_data(data_args, evaluate=True), tokenizer) if training_args.do_eval else None

    def compute_metrics(p: EvalPrediction) -> Dict:
        preds = np.argmax(p.predictions, axis=1)
        return f1(preds, p.label_ids)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_set,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics
    )

    # Training
    if training_args.do_train:
        trainer.train(
            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
        )
        trainer.save_model()
        tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation
    results = {}
    if training_args.do_eval and training_args.local_rank in [-1, 0]:
        logger.info("*** Evaluate ***")

        eval_datasets = [eval_dataset]
        for eval_dataset in eval_datasets:
            result_set = trainer.evaluate(eval_dataset=eval_dataset)
            result = result_set[0].metrics

            output_eval_file = os.path.join(
                training_args.output_dir, f"eval_results_.txt"
            )
            with open(output_eval_file, "w") as writer:
                logger.info("***** Eval results *****")
                for key, value in result.items():
                    logger.info("  %s = %s", key, value)
                    writer.write("%s = %s\n" % (key, value))

            results.update(result)

            preds_t1, label_ids_t1 = result_set[0].predictions, result_set[0].label_ids
            preds_t2, label_ids_t2 = result_set[1].predictions, result_set[1].label_ids
            preds_t1, labels_t1 = store_preds(EvalPrediction(predictions=preds_t1, label_ids=label_ids_t1))
            preds_t2, labels_t2 = store_preds(EvalPrediction(predictions=preds_t2, label_ids=label_ids_t2))

            data = _load_data(data_args, evaluate=True)
            context, reply = [], []
            for example in data:
                ctx, rpl = example.split('\t')[0:2]
                context.append(ctx)
                reply.append(rpl)

            output_score_file_t1 = os.path.join(
                training_args.output_dir, f"eval_preds_t1.txt"
            )

            output_score_file_t2 = os.path.join(
                training_args.output_dir, f"eval_preds_t2.txt"
            )

            with open(output_score_file_t1, "w") as writer:
                for i in range(len(context)):
                    writer.write("%s\t%s\t%s\t%s\n" % (context[i], reply[i], labels_t1[i], preds_t1[i]))

            with open(output_score_file_t2, "w") as writer:
                for i in range(len(context)):
                    writer.write("%s\t%s\t%s\t%s\n" % (context[i], reply[i], labels_t2[i], preds_t2[i]))

    return results
Exemple #12
0
    def _prediction_loop(
            self,
            dataloader: DataLoader,
            description: str,
            prediction_loss_only: Optional[bool] = None) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by `evaluate()` and `predict()`.
        Works both with or without labels.
        """
        prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        else:
            model = self.model
        # Note: in torch.distributed mode, there's no point in wrapping the model
        # inside a DistributedDataParallel as we'll be under `no_grad` anyways.
        batch_size = dataloader.batch_size
        LOG.info("***** Running %s *****", description)
        LOG.info("  Num examples = %d", self.num_examples(dataloader))
        LOG.info("  Batch size = %d", batch_size)
        eval_losses: List[float] = []
        preds: torch.Tensor = None
        label_ids: torch.Tensor = None
        model.eval()
        # if is_torch_tpu_available():
        #     dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)
        if self.args.past_index >= 0:
            past = None
        for inputs in tqdm(dataloader, desc=description):
            has_labels = any(
                inputs.get(k) is not None
                for k in ["labels", "lm_labels", "masked_lm_labels"])
            for k, v in inputs.items():
                if isinstance(v, torch.Tensor):
                    inputs[k] = v.to(self.args.device)
            if self.args.past_index >= 0:
                inputs["mems"] = past

            with torch.no_grad():
                # if self.args.predict_from_generate:
                if True:
                    max_length = model.config.max_length
                    logits_out = model.generate(
                        inputs["input_ids"],
                        attention_mask=inputs["attention_mask"])
                    # in case the batch is shorter then max length, the output should be padded
                    logits = model.config.eos_token_id * torch.ones(
                        (logits_out.shape[0], max_length),
                        dtype=logits_out.dtype,
                        device=logits_out.device)
                    logits[:, :logits_out.shape[-1]] = logits_out

                    if has_labels:
                        outputs = model(**inputs)
                        step_eval_loss = outputs[0]
                        eval_losses += [step_eval_loss.mean().item()]
                else:
                    outputs = model(**inputs)

                    if has_labels:
                        step_eval_loss, logits = outputs[:2]
                        eval_losses += [step_eval_loss.mean().item()]
                    else:
                        logits = outputs[0]
                    if self.args.past_index >= 0:
                        past = outputs[self.args.past_index if has_labels else
                                       self.args.past_index - 1]
            if not prediction_loss_only:
                if preds is None:
                    preds = logits.detach()
                else:
                    preds = torch.cat((preds, logits.detach()), dim=0)
                if inputs.get("labels") is not None:
                    if label_ids is None:
                        label_ids = inputs["labels"].detach()
                    else:
                        label_ids = torch.cat(
                            (label_ids, inputs["labels"].detach()), dim=0)
        if self.args.local_rank != -1:
            # In distributed mode, concatenate all results from all nodes:
            if preds is not None:
                preds = self.distributed_concat(
                    preds, num_total_examples=self.num_examples(dataloader))
            if label_ids is not None:
                label_ids = self.distributed_concat(
                    label_ids,
                    num_total_examples=self.num_examples(dataloader))
    # elif is_torch_tpu_available():
    #     # tpu-comment: Get all predictions and labels from all worker shards of eval dataset
    #     if preds is not None:
    #         preds = xm.mesh_reduce("eval_preds", preds, torch.cat)
    #     if label_ids is not None:
    #         label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat)
    # Finally, turn the aggregated tensors into numpy arrays.
        if preds is not None:
            preds = preds.cpu().numpy()
        if label_ids is not None:
            label_ids = label_ids.cpu().numpy()
        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(
                EvalPrediction(predictions=preds, label_ids=label_ids))
        else:
            metrics = {}
        if len(eval_losses) > 0:
            metrics["eval_loss"] = np.mean(eval_losses)
        # Prefix all keys with eval_
        for key in list(metrics.keys()):
            if not key.startswith("eval_"):
                metrics[f"eval_{key}"] = metrics.pop(key)
        return PredictionOutput(predictions=preds,
                                label_ids=label_ids,
                                metrics=metrics)
Exemple #13
0
    def train_session(self, model_tts: ForwardTacotron,
                      model_asr: Wav2Vec2ForCTC, optimizer_tts: Optimizer,
                      tts_session: ForwardSession, asr_session: ASRSession,
                      asr_trainer, optimizer_asr) -> None:
        # print(tts_session.path)
        # exit()
        asr_trainer_state = {'logs': []}
        current_step = model_tts.get_step()
        tts_training_steps = tts_session.max_step - current_step
        try:
            _, asr_current_step = get_last_checkpoint(
                './checkpoints/sme_speech_tts.asr_forward/', 'model_at')
            asr_training_steps = tts_session.max_step - asr_current_step
        except:
            asr_current_step = 0
            asr_training_steps = tts_training_steps

        total_iters = len(tts_session.train_set)
        epochs = tts_training_steps // total_iters + 1
        simple_table([
            ('TTS Steps', str(tts_training_steps // 1000) + 'k Steps'),
            ('ASR Steps', str(asr_training_steps // 1000) + 'k Steps'),
            ('Batch Size TTS', tts_session.bs),
            ('Learning Rate', tts_session.lr)
        ])

        for g in optimizer_tts.param_groups:
            g['lr'] = tts_session.lr

        m_loss_avg = Averager()
        dur_loss_avg = Averager()
        duration_avg = Averager()

        device = next(model_tts.parameters()
                      ).device  # use same device as model parameters
        warnings.filterwarnings('ignore', category=UserWarning)
        for e in range(1, epochs + 1):

            #tts train loop for epoch
            for i, (x, m, ids, x_lens, mel_lens,
                    dur) in enumerate(tts_session.train_set, 1):
                start = time.time()
                model_tts.train()
                x, m, dur, x_lens, mel_lens = x.to(device), m.to(device), dur.to(device),\
                                                     x_lens.to(device), mel_lens.to(device)

                m1_hat, m2_hat, dur_hat = model_tts(x, m, dur, mel_lens)

                m1_loss = self.l1_loss(m1_hat, m, mel_lens)
                m2_loss = self.l1_loss(m2_hat, m, mel_lens)

                dur_loss = self.l1_loss(dur_hat.unsqueeze(1), dur.unsqueeze(1),
                                        x_lens)

                tts_s_loss = m1_loss + m2_loss + 0.1 * dur_loss
                optimizer_tts.zero_grad()
                # tts_s_loss.backward()
                torch.nn.utils.clip_grad_norm_(model_tts.parameters(),
                                               hp.tts_clip_grad_norm)
                # optimizer_tts.step()
                m_loss_avg.add(m1_loss.item() + m2_loss.item())
                dur_loss_avg.add(dur_loss.item())
                step = model_tts.get_step()
                k = step // 1000

                duration_avg.add(time.time() - start)
                # pitch_loss_avg.add(pitch_loss.item())

                speed = 1. / duration_avg.get()
                msg_tts = f'| TTS MODEL (supervised training ): '\
                      f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \
                      f'| Dur Loss: {dur_loss_avg.get():#.4} ' \
                      f'| {speed:#.2} steps/s | Step: {k}k | '

                if step % hp.forward_checkpoint_every == 0:
                    ckpt_name = f'forward_step{k}K'
                    save_checkpoint('forward',
                                    self.paths,
                                    model_tts,
                                    optimizer_tts,
                                    name=ckpt_name,
                                    is_silent=True)

                if step % hp.forward_plot_every == 0:

                    self.generate_plots(model_tts, tts_session)

                self.writer.add_scalar('Mel_Loss/train', m1_loss + m2_loss,
                                       model_tts.get_step())
                self.writer.add_scalar('Duration_Loss/train', dur_loss,
                                       model_tts.get_step())
                self.writer.add_scalar('Params/batch_size', tts_session.bs,
                                       model_tts.get_step())
                self.writer.add_scalar('Params/learning_rate', tts_session.lr,
                                       model_tts.get_step())

                stream(msg_tts)
                # print(msg_tts)
            # print(torch.cuda.memory_allocated(device=device))
            # model_tts = model_tts.to('cpu')

            for step, inputs in enumerate(asr_session.train_set):

                optimizer_asr.zero_grad()

                model_asr.to(device)
                for k, v in inputs.items():
                    if isinstance(v, torch.Tensor):
                        inputs[k] = v.to(device)
                model_asr.train()
                outputs = model_asr(**inputs)
                asr_s_loss = outputs["loss"] if isinstance(
                    outputs, dict) else outputs[0]
                # asr_s_loss = asr_s_loss.mean()

                msg_asr =  f'| ASR MODEL (supervised training) : '\
                            f'| Epoch: {e}/{epochs} ({step}/{len(asr_session.train_set)}) | Loss ASR: {asr_s_loss:#.4} '\
                            f' ||||||||||||||||||||||'

                stream(msg_asr)
            # # model_asr.to('cuda')

            m_val_loss, dur_val_loss = self.evaluate(model_tts,
                                                     tts_session.val_set)
            eval_tts_msg = f'| TTS MODEL (supervised eval ): '\
                        f'| Epoch: {e}/{epochs} | Val Loss: {m_val_loss:#.4} ' \
                        f'| Dur Val Loss: {dur_val_loss:#.4} ' \

            stream(eval_tts_msg)
            tts_eval_loss = m_val_loss + dur_val_loss
            #     print(eval_tts_msg)

            # ASR eval supervised
            print('\nEvaluating ASR model ...')
            # model_asr.to('cpu')
            asr_eval_loss = 0
            eval_wer = 0

            for step, inputs in enumerate(asr_session.test_set):
                asr_eval_loss_i, logits_a, labels_a = asr_trainer.prediction_step(
                    model_asr, inputs, False)
                asr_eval_loss += asr_eval_loss_i
                logits_a.to('cpu')
                eval_wer_i = asr_trainer.compute_metrics(
                    EvalPrediction(predictions=logits_a, label_ids=labels_a))
                eval_wer += eval_wer_i['wer']
                # print(eval_wer)
            eval_wer = eval_wer / step
            asr_eval_loss = asr_eval_loss / step

            msg_asr_eval = f'| ASR MODEL (supervised eval) : Epoch {e}/{epochs} | Loss ASR: {asr_eval_loss:#.4} | WER: {eval_wer} |||||||||||||||||||||||||||||||||||||||||||||||||||||'
            stream(msg_asr_eval)

            # dual transformation loop
            # tts_s_loss = 3
            # asr_s_loss = 1
            tts_u_loss, asr_u_loss = self.dual_transform(
                model_tts, model_asr, optimizer_tts, optimizer_asr,
                asr_session.test_set, m_loss_avg, dur_loss_avg, device,
                asr_current_step, e, epochs, duration_avg, total_iters,
                tts_s_loss, asr_s_loss, tts_session.lr, tts_session.path)
            step += 1
            asr_path = f'checkpoint-27364'
            modelasr_folder = './checkpoints/sme_speech_tts.asr_forward/'
            new_check = modelasr_folder + asr_path
            os.makedirs(new_check, exist_ok=True)

            # asr_path, asr_step = get_last_checkpoint(modelasr_folder, modelasr_name)

            save_checkpoint('forward',
                            self.paths,
                            model_tts,
                            optimizer_tts,
                            is_silent=True)

            # asr_u_loss = 2

            if "logs" not in asr_trainer_state:
                asr_trainer_state['logs'] = []
            asr_trainer_state['logs'].append({
                'step':
                step,
                'epoch':
                e,
                'asr_s_loss':
                int(asr_s_loss),
                'asr_u_loss':
                int(asr_u_loss),
                'tts_s_loss':
                int(tts_s_loss),
                'tts_u_loss':
                int(tts_u_loss),
                'tts_eval_loss':
                int(tts_eval_loss),
                'asr_eval_loss':
                int(asr_eval_loss),
                'eval_wer':
                eval_wer
            })

            with open(f'{modelasr_folder+ asr_path}/dt_trainer_state.json',
                      'w') as f:
                json.dump(asr_trainer_state, f)

            model_asr.save_pretrained(f'{new_check}')

            torch.save(optimizer_asr.state_dict(), f'{new_check}/optimizer.pt')

            print("Exiting due to cuda OOM!")
            exit(11)
Exemple #14
0
    def _prediction_loop(
        self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None
    ) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by `evaluate()` and `predict()`.

        Works both with or without labels.
        """

        prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only

        # multi-gpu eval
        if self.args.n_gpu > 1 and not isinstance(self.model, torch.nn.DataParallel):
            model = torch.nn.DataParallel(self.model)
        else:
            model = self.model
        model.to(self.args.device)

        logger.info("***** Running %s *****", description)
        logger.info("  Num examples = %d", len(dataloader.dataset))
        logger.info("  Batch size = %d", dataloader.batch_size)
        eval_losses: List[float] = []
        preds: np.ndarray = None
        label_ids: np.ndarray = None
        model.eval()

        for inputs in tqdm(dataloader, desc=description):
            has_labels = any(inputs.get(k) is not None for k in ["labels", "masked_lm_labels"])

            for k, v in inputs.items():
                inputs[k] = v.to(self.args.device)

            with torch.no_grad():
                outputs = model(**inputs)
                if has_labels:
                    step_eval_loss, logits = outputs[:2]
                    eval_losses += [step_eval_loss.mean().item()]
                else:
                    logits = outputs[0]

            if not prediction_loss_only:
                if self.args.classify_or_insertion == 'classify':
                    padding_max_length = self.model.config.max_position_embeddings
                elif self.args.classify_or_insertion == 'insertion':
                    padding_max_length = self.model.encoder.config.max_position_embeddings
                mode = self.args.classify_or_insertion
                if preds is None:
                    preds = self.padding_ndarray(
                                        ndarray=self.convert_to_np_array(logits, description=mode),
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                else:
                    tmp_preds = self.padding_ndarray(
                                        ndarray=self.convert_to_np_array(logits, description=mode),
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                    preds = np.append(preds, tmp_preds, axis=0)
                if inputs.get("labels") is not None:
                    if label_ids is None:
                        label_ids = self.padding_ndarray(
                                        ndarray=inputs["labels"].detach().cpu().numpy(), 
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                    else:
                        tmp_label_ids = self.padding_ndarray(
                                        ndarray=inputs["labels"].detach().cpu().numpy(), 
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                        label_ids = np.append(label_ids, tmp_label_ids, axis=0)
                elif inputs.get("masked_lm_labels") is not None:
                    if label_ids is None:
                        label_ids = self.padding_ndarray(
                                        ndarray=inputs["masked_lm_labels"].detach().cpu().numpy(), 
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                    else:
                        tmp_label_ids = self.padding_ndarray(
                                        ndarray=inputs["masked_lm_labels"].detach().cpu().numpy(), 
                                        padding_max_length=padding_max_length,
                                        axis=1, 
                                        padding_id=-100
                                        )
                        label_ids = np.append(label_ids, tmp_label_ids, axis=0)
        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(self.args, EvalPrediction(predictions=preds, label_ids=label_ids))
        else:
            metrics = {}
        if len(eval_losses) > 0:
            metrics["loss"] = np.mean(eval_losses)
        return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
Exemple #15
0
    def post_processing_function(examples,
                                 features,
                                 predictions,
                                 stage="eval"):
        # Post-processing: we match the start logits and end logits to
        # answers in the original context.

        if data_args.beam_search:
            predictions, scores_diff_json = \
                postprocess_qa_predictions_with_beam_search(
                    examples=examples,
                    features=features,
                    predictions=predictions,
                    version_2_with_negative=data_args.version_2_with_negative,
                    n_best_size=data_args.n_best_size,
                    max_answer_length=data_args.max_answer_length,
                    start_n_top=model.config.start_n_top,
                    end_n_top=model.config.end_n_top,
                    output_dir=training_args.output_dir,
                    # log_level=log_level,
                    prefix=stage,
                )

        else:
            predictions = postprocess_qa_predictions(
                examples=examples,
                features=features,
                predictions=predictions,
                version_2_with_negative=data_args.version_2_with_negative,
                n_best_size=data_args.n_best_size,
                max_answer_length=data_args.max_answer_length,
                output_dir=training_args.output_dir,
                prefix=stage,
            )

        if data_args.version_2_with_negative:
            if data_args.beam_search:
                formatted_predictions = [
                    {
                        "id": k,
                        "prediction_text": v,
                        "no_answer_probability": scores_diff_json[k]
                    }  # noqa E501
                    for k, v in predictions.items()
                ]
            else:
                formatted_predictions = [
                    {
                        "id": k,
                        "prediction_text": v,
                        "no_answer_probability": 0.0
                    } for k, v in predictions.items()  # noqa E501
                ]
        else:
            formatted_predictions = [{
                "id": k,
                "prediction_text": v
            } for k, v in predictions.items()]  # noqa E501

        references = [{
            "id": ex["id"],
            "answers": ex[answer_column_name]
        } for ex in examples]  # noqa E501
        return EvalPrediction(predictions=formatted_predictions,
                              label_ids=references)
Exemple #16
0
    def prediction_loop(
        self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None, use_tqdm: Optional[bool] = True,
        reduce_other_outputs:Callable[[Tuple[torch.tensor]], Any] = None, 
    ) -> PredictionOutput:
        """
        Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.

        Works both with or without labels.
        """
        if hasattr(self, "_prediction_loop"):
            warnings.warn(
                "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.",
                FutureWarning,
            )
            return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)

        prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only

        model = self.model
        # multi-gpu eval
        if self.args.n_gpu > 1:
            model = torch.nn.DataParallel(model)
        else:
            model = self.model
        # Note: in torch.distributed mode, there's no point in wrapping the model
        # inside a DistributedDataParallel as we'll be under `no_grad` anyways.

        batch_size = dataloader.batch_size
        logger.info("***** Running %s *****", description)
        logger.info("  Num examples = %d", self.num_examples(dataloader))
        logger.info("  Batch size = %d", batch_size)
        eval_losses: List[float] = []
        preds: torch.Tensor = None
        label_ids: torch.Tensor = None
        other_outputs: Tuple[torch.Tensor] = None
        model.eval()

        if is_torch_tpu_available():
            dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device)

        if self.args.past_index >= 0:
            self._past = None

        data_iterators = tqdm(dataloader, desc=description) if use_tqdm else dataloader
        reduce_other_outputs = reduce_other_outputs if reduce_other_outputs is not None else self.reduce_other_outputs

        for inputs in data_iterators:
            loss, logits, labels, other_outputs_ = self.prediction_step(model, inputs, prediction_loss_only)
            if loss is not None:
                eval_losses.append(loss)
            if logits is not None:
                preds = logits if preds is None else torch.cat((preds, logits), dim=0)
            if labels is not None:
                label_ids = labels if label_ids is None else torch.cat((label_ids, labels), dim=0)
            if other_outputs_ is not None:
                #print(list(o.size() for o in other_outputs_))
                #if other_outputs is not None:
                #    print(list(o.size() for o in others))
                if reduce_other_outputs is not None:
                    other_outputs = other_outputs_ if other_outputs is None else tuple(
                        reduce_other_outputs(output, output_) for output, output_ in zip(other_outputs, other_outputs_)
                        ) # 不加tuple只用()就是generator

        if self.args.past_index and hasattr(self, "_past"):
            # Clean the state at the end of the evaluation loop
            delattr(self, "_past")

        if self.args.local_rank != -1:
            # In distributed mode, concatenate all results from all nodes:
            if preds is not None:
                preds = self.distributed_concat(preds, num_total_examples=self.num_examples(dataloader))
            if label_ids is not None:
                label_ids = self.distributed_concat(label_ids, num_total_examples=self.num_examples(dataloader))
            #if other_outputs is not None: # [TODO] maybe error!!! 不熟悉distributed训练
            #    other_outputs = tuple(self.distributed_concat(o, num_total_examples=self.num_examples(dataloader)) for o in other_outputs)
        elif is_torch_tpu_available():
            # tpu-comment: Get all predictions and labels from all worker shards of eval dataset
            if preds is not None:
                preds = xm.mesh_reduce("eval_preds", preds, torch.cat)
            if label_ids is not None:
                label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat)
            #if others is not None: # [TODO] maybe error!!! 不熟悉tpu训练, 这里就不考虑了
            #others = tuple(xm.mesh_reduce("eval_label_ids", label_ids, torch.cat)
        
        # Finally, turn the aggregated tensors into numpy arrays.
        if preds is not None:
            preds = preds.cpu().numpy()
        if label_ids is not None:
            label_ids = label_ids.cpu().numpy()
        if other_outputs is not None: 
            other_outputs = other_outputs # 假设一切都在self.reduce_other_outputs中处理好了
        if self.compute_metrics is not None and preds is not None and label_ids is not None:
            metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
        else:
            metrics = {}
        if len(eval_losses) > 0:
            metrics[f"{description}_loss"] = np.mean(eval_losses)

        # Prefix all keys with eval_
        #for key in list(metrics.keys()):
        #    if not key.startswith("eval_"):
        #        metrics[f"eval_{key}"] = metrics.pop(key)
        for key in list(metrics.keys()):
            if not key.startswith(description):
                tqdm_prefix = ""
                new_key = key
                if key[0] == "_":
                    tqdm_prefix = "_"
                    new_key = key[1:]
                
                metrics[tqdm_prefix+description+"_"+new_key] = metrics.pop(key)

        return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics, other_outputs=other_outputs)