Ejemplo n.º 1
0
    def train_one_epoch(self):
        self.model.train()
        loss = AverageMeter()
        acc = AverageMeter()
        if self.verbose:
            iterator = tqdm(self.train_loader)
        else:
            iterator = self.train_loader
        for x, y in iterator:
            x = x.to(self.device)
            y = y.to(self.device)
            output = self.model(x)
            current_loss = self.loss(output, y)
            self.optimizer.zero_grad()
            current_loss.backward()
            self.optimizer.step()
            loss.update(current_loss.item())

            output = output.detach().cpu().numpy()
            y = y.cpu().numpy()
            accuracy = get_accuracy(output, y)
            acc.update(accuracy, y.shape[0])
        # if self.mode == 'crossval':
        s = ('Training epoch {} | loss: {} - accuracy: '
             '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val,
                                                                   5)))
        print_and_log(self.logger, s)
Ejemplo n.º 2
0
    def train_one_epoch(self):
        self.model.train()
        loss = AverageMeter()
        acc = AverageMeter()

        for x, y in self.train_loader:
            x = x.float()

            x = x.to(self.device)
            y = y.to(self.device)

            output = self.model(x)
            current_loss = self.loss(output, y)
            self.optimizer.zero_grad()
            current_loss.backward()
            self.optimizer.step()

            loss.update(current_loss.item())
            accuracy = get_accuracy(output, y)
            acc.update(accuracy, y.shape[0])

        # if self.config.mode == 'crossval':
        s = ('Training epoch {} | loss: {} - accuracy: '
             '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val,
                                                                   5)))
        # print_and_log(self.logger, s)
        self.logger.info(s)
Ejemplo n.º 3
0
    def validate(self):
        self.model.eval()

        loss = AverageMeter()
        acc = AverageMeter()
        for x, y in self.val_loader:
            x = x.float()

            x = x.to(self.device)
            y = y.to(self.device)

            output = self.model(x)
            current_loss = self.loss(output, y)

            loss.update(current_loss.item())
            accuracy = get_accuracy(output, y)
            acc.update(accuracy, y.shape[0])

        s = ('Validating epoch {} | loss: {} - accuracy: '
             '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val,
                                                                   5)))
        # print_and_log(self.logger, s)
        self.logger.info(s)

        return acc.val, loss.val
Ejemplo n.º 4
0
    def show_report(self, title, y_true, y_pred):
        print("========================")
        print(title)

        print("")
        print("Classification Report: ")
        print(classification_report(y_true, y_pred))
        print("")
        print("Accuracy Score: ", accuracy_score(y_true, y_pred))

        print("Confusion Matrix: ")
        print(confusion_matrix(y_true, y_pred))

        print("Accuracy Metrics: ")
        print(get_accuracy(y_true, y_pred))
        print("========================")
Ejemplo n.º 5
0
    def train_one_epoch(self):
        self.optimizer.zero_grad()
        self.model.train()
        loss = AverageMeter()
        acc = AverageMeter()
        if self.verbose:
            iterator = enumerate(tqdm(self.train_loader))
        else:
            iterator = enumerate(self.train_loader)
        for i, (x, y) in iterator:
            attention_mask = (x > 0).float().to(self.device)
            x = x.to(self.device)
            y = y.to(self.device)
            current_loss, output = self.model(x,
                                              attention_mask=attention_mask,
                                              labels=y)
            current_loss = current_loss / self.accumulation_steps
            current_loss.backward()
            loss.update(current_loss.detach().item())
            # MAX_GRAD_NORM = 1.0
            # nn.utils.clip_grad_norm_(self.model.parameters(),
            # 						 MAX_GRAD_NORM)
            if (i + 1) % self.accumulation_steps == 0:
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()

            output = output.detach().cpu().numpy()
            y = y.cpu().numpy()
            accuracy = get_accuracy(output, y)
            acc.update(accuracy, y.shape[0])

            # del current_loss
            # del output
            # del accuracy
            # del attention_mask
        # if self.mode == 'crossval':
        s = ('Training epoch {} | loss: {} - accuracy: '
             '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val,
                                                                   5)))
        print_and_log(self.logger, s)
Ejemplo n.º 6
0
    def validate(self):
        self.model.eval()
        with torch.no_grad():
            loss = AverageMeter()
            acc = AverageMeter()
            for x, y in self.val_loader:
                attention_mask = (x > 0).float().to(self.device)
                x = x.to(self.device)
                y = y.to(self.device)
                current_loss, output = self.model(
                    x, attention_mask=attention_mask, labels=y)
                loss.update(current_loss.detach().item())
                output = output.detach().cpu().numpy()
                y = y.cpu().numpy()
                accuracy = get_accuracy(output, y)
                acc.update(accuracy, y.shape[0])
        s = ('Validating epoch {} | loss: {} - accuracy: '
             '{}'.format(self.cur_epoch, round(loss.val, 5), round(acc.val,
                                                                   5)))
        print_and_log(self.logger, s)
        # self.logger.info(s)
        # print(s)

        return acc.val, loss.val
Ejemplo n.º 7
0
def train(train_dataloader, eval_dataloader, model, config, neptune_project, freeze_model=False):
    """
    :param train_dataset: iterator on the training set
    :param eval_dataset: iterator on the test set
    :param model: instance of the model to train
    :param processor: processor object used for evaluation
    :param config: Config
    :param freeze_model: whether or not to freeze BERT
    """
    if config["resume_training"]:
        # retrieving and updating already existing experiment
        exp = neptune_project.get_experiments(id=config["neptune_id"])[0]
    else:
        # creating a neptune experiment
        exp = neptune_project.create_experiment(name="{}_{}".format(config["model_type"], str(datetime.now())),
                                                params=config,
                                                upload_source_files=['*.py', "models/", "utils/"],
                                                tags=[config["model_type"]] + config["tags"])

    num_training_steps = len(train_dataloader) * config['num_train_epochs'] // config['gradient_accumulation_steps']

    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
         'weight_decay': config['weight_decay']},
        {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    warmup_steps = math.ceil(num_training_steps * config['warmup_ratio'])
    config['warmup_steps'] = warmup_steps if config['warmup_steps'] else 0

    optimizer = AdamW(optimizer_grouped_parameters, lr=config['learning_rate'], eps=config['adam_epsilon'])
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=config['warmup_steps'],
                                                num_training_steps=num_training_steps)

    # will freeze all the model parameters except the classification part
    if freeze_model:
        model.freeze_bert_encoder()

    # optimization
    if config['fp16']:
        try:
            from apex import amp
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=config['fp16_opt_level'])

    # if running on multiple GPUs
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)

    logging.info("***** Running training *****")
    logging.info("  Num examples = %d", len(train_dataloader))
    logging.info("  Num Epochs = %d", config['num_train_epochs'])
    logging.info("  Total train batch size  = %d", config['train_batch_size'])
    logging.info("  Gradient Accumulation steps = %d", config['gradient_accumulation_steps'])
    logging.info("  Total optimization steps = %d", num_training_steps)

    tr_loss, train_acc, logging_loss = 0.0, 0.0, 0.0
    model.zero_grad()
    start_epoch = int(config.get("previous_checkpoint", 0) / len(train_dataloader))
    global_step = start_epoch * len(train_dataloader)
    train_iterator = trange(start_epoch, int(config['num_train_epochs']), desc="Epoch")

    # starting training
    for epoch in train_iterator:
        epoch_losses = []
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            # avoiding to feed already seen batches when resuming training
            if config["resume_training"] and global_step < config.get("previous_checkpoint") + 1:
                global_step += 1
                continue

            model.train()

            if 'distilbert' not in config['model_type']:
                inputs = {'input_ids': batch["input_ids"].to(device),
                          'attention_mask': batch["input_mask"].to(device),
                          'token_type_ids': batch["token_type_ids"].to(device) if config['model_type'] in
                                                                                ['bert', 'xlnet'] else None,
                          'labels': batch["labels"]}
            else:
                inputs = {'input_ids': batch["input_ids"].to(device),
                          'attention_mask': batch["input_mask"].to(device),
                          'labels': batch["labels"].to(device)}

            outputs = model(**inputs)
            loss, logits = outputs[:2]  # model outputs are always tuple in pytorch-transformers (see doc)

            # handle multi-gpus run
            if config["n_gpu"] > 1:
                loss = loss.mean()
            print("\r%f" % loss, end='')
            epoch_losses.append(loss.item())

            if config['task_name'] == "multi-label":
                with torch.no_grad():
                    logits = logits.sigmoid()
                train_acc += accuracy_thresh(logits, inputs["labels"])
            else:
                train_acc += get_accuracy(logits.detach().cpu().numpy(), batch[3].detach().cpu().numpy())

            # gradient accumulation
            if config['gradient_accumulation_steps'] > 1:
                loss = loss / config['gradient_accumulation_steps']

            # optimization
            if config['fp16']:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), config['max_grad_norm'])
            else:
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), config['max_grad_norm'])

            tr_loss += loss.item()
            if (step + 1) % config['gradient_accumulation_steps'] == 0:
                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                if config['logging_steps'] > 0 and global_step % config['logging_steps'] == 0:
                    exp.log_metric(log_name='lr', y=scheduler.get_lr()[0], x=global_step)
                    exp.log_metric(log_name='train_loss', y=(tr_loss - logging_loss) / config['logging_steps'],
                                   x=global_step)
                    exp.log_metric(log_name='train_acc', y=train_acc / config['logging_steps'], x=global_step)
                    logging_loss = tr_loss
                    train_acc = 0.0

                if config['save_steps'] > 0 and global_step % config['save_steps'] == 0:
                    # Save model checkpoint
                    output_dir = os.path.join(config['output_dir'], 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(model,
                                                            'module') else model  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
                    logging.info("Saving model checkpoint to %s", output_dir)
                    exp.log_artifact(os.path.join(output_dir, "pytorch_model.bin"),
                                     "pytorch_model_{}.bin".format(global_step))

        # Log metrics
        if config['evaluate_during_training']:
            results = evaluate(eval_dataloader, model, config, epoch)
            for key, value in results["scalars"].items():
                exp.log_metric(log_name='eval_{}'.format(key), y=value, x=epoch)

            if "labels_probs" in results["arrays"].keys():
                labels_probs = results["arrays"]["labels_probs"]
                for i in range(labels_probs.shape[0]):
                    fig = plt.figure(figsize=(15, 15))
                    sns.distplot(labels_probs[i], kde=False, bins=100)
                    plt.title("Probability boxplot for label {}".format(i))
                    log_chart(name="dist_label_{}_epoch_{}".format(i, epoch), chart=fig, experiment=exp)
                    plt.close("all")

    return global_step, tr_loss / global_step