コード例 #1
0
ファイル: TrainerGae.py プロジェクト: DiegoKoz/scisci
    def fit(self, optimizer, patience, num_epochs=200):

        liveloss = PlotLosses()
        # initialize the early_stopping object
        early_stopping = EarlyStopping(patience=patience,
                                       verbose=True,
                                       metric='auc')

        for epoch in tqdm(range(num_epochs)):
            logs = {}
            self.train(optimizer)
            val_auc, val_ap = self.evaluate(validation=True, test=False)

            logs['val_auc'] = val_auc
            logs['val_ap'] = val_ap

            liveloss.update(logs)
            liveloss.send()

            self.writer.add_scalar('val_auc', val_auc, epoch)
            self.writer.add_scalar('val_ap', val_ap, epoch)

            ### Add Early stop implementation
            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current model
            early_stopping(val_auc, self.model)

            if early_stopping.early_stop:
                print("Early stopping")
                break
        # load the last checkpoint with the best model
        self.model.load_state_dict(torch.load('checkpoint.pt'))
        return self.model
コード例 #2
0
def test_neptune():
    neptune_logger = NeptuneLogger(
        api_token="ANONYMOUS", project_qualified_name="shared/colab-test-run", tags=['livelossplot', 'github-actions']
    )

    plotlosses = PlotLosses(outputs=[neptune_logger])

    assert neptune_logger.experiment.state == 'running'

    for i in range(3):
        plotlosses.update(
            {
                'acc': 1 - np.random.rand() / (i + 2.),
                'val_acc': 1 - np.random.rand() / (i + 0.5),
                'loss': 1. / (i + 2.),
                'val_loss': 1. / (i + 0.5)
            }
        )
        plotlosses.send()

    assert neptune_logger.experiment.state == 'running'

    neptune_logger.close()

    assert neptune_logger.experiment.state == 'succeeded'

    url = neptune.project._get_experiment_link(neptune_logger.experiment)

    assert len(url) > 0
コード例 #3
0
def test_extrema_print():
    """Test if plugin object cache contains valid values"""
    groups = {'accuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss']}
    plugin = ExtremaPrinter()
    outputs = (plugin, )
    liveplot = PlotLosses(outputs=outputs, groups=groups)
    liveplot.update({'acc': 0.5, 'val_acc': 0.4, 'loss': 1.2, 'val_loss': 1.1})
    liveplot.update({
        'acc': 0.55,
        'val_acc': 0.45,
        'loss': 1.1,
        'val_loss': 1.0
    })
    liveplot.update({
        'acc': 0.65,
        'val_acc': 0.35,
        'loss': 0.5,
        'val_loss': 0.9
    })
    liveplot.update({
        'acc': 0.65,
        'val_acc': 0.55,
        'loss': 1.0,
        'val_loss': 0.9
    })
    liveplot.send()
    assert len(plugin.extrema_cache['log-loss']) == 2
    assert len(plugin.extrema_cache['log-loss']['training ']) == 3
    assert plugin.extrema_cache['accuracy']['validation ']['min'] == 0.35
    assert plugin.extrema_cache['accuracy']['validation ']['max'] == 0.55
    assert plugin.extrema_cache['accuracy']['validation ']['current'] == 0.55
コード例 #4
0
def test_minus_from_step():
    """Test from_step < 0"""
    out = CheckOutput(target_log_history_length=6)
    loss_plotter = PlotLosses(outputs=[out], from_step=-5)
    for idx in range(10):
        loss_plotter.update({
            'acc': 0.1 * idx,
            'loss': 0.69 / (idx + 1),
        })
    loss_plotter.send()
コード例 #5
0
def test_default_from_step():
    """Test without from_step"""
    out = CheckOutput(target_log_history_length=10)
    loss_plotter = PlotLosses(outputs=[out])
    for idx in range(10):
        loss_plotter.update({
            'acc': 0.1 * idx,
            'loss': 0.69 / (idx + 1),
        })
    loss_plotter.send()
コード例 #6
0
ファイル: neptune.py プロジェクト: zwq1230/livelossplot
def main():
    api_token = os.environ.get('NEPTUNE_API_TOKEN')
    project_qualified_name = os.environ.get('NEPTUNE_PROJECT_NAME')
    logger = NeptuneLogger(api_token=api_token,
                           project_qualified_name=project_qualified_name)
    liveplot = PlotLosses(outputs=[logger])
    for i in range(20):
        liveplot.update({
            'accuracy': 1 - np.random.rand() / (i + 2.),
            'val_accuracy': 1 - np.random.rand() / (i + 0.5),
            'mse': 1. / (i + 2.),
            'val_mse': 1. / (i + 0.5)
        })
        liveplot.send()
        sleep(.5)
コード例 #7
0
def test_bokeh_plot():
    logger = BokehPlot()

    liveplot = PlotLosses(outputs=[logger], mode='script')

    for i in range(3):
        liveplot.update({
            'acc': 1 - np.random.rand() / (i + 2.),
            'val_acc': 1 - np.random.rand() / (i + 0.5),
            'loss': 1. / (i + 2.),
            'val_loss': 1. / (i + 0.5)
        })
        liveplot.send()

    assert os.path.isfile(logger.output_file)
コード例 #8
0
def test_tensorboard():
    groups = {
        'acccuracy': ['acc', 'val_acc'],
        'log-loss': ['loss', 'val_loss']
    }
    logger = TensorboardTFLogger()

    liveplot = PlotLosses(groups=groups, outputs=(logger, ))

    for i in range(3):
        liveplot.update({
            'acc': 1 - np.random.rand() / (i + 2.),
            'val_acc': 1 - np.random.rand() / (i + 0.5),
            'loss': 1. / (i + 2.),
            'val_loss': 1. / (i + 0.5)
        })
        liveplot.send()

    assert all([
        f.startswith('events.out.tfevents.') for f in os.listdir(logger._path)
    ])
コード例 #9
0
    def fit(self, train_loader):
        liveloss = PlotLosses()
        logs = {}

        for epoch in range(self.epoch_num):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data.float()).to(
                    self.device), Variable(target.float()).to(self.device)
                data = data.view(-1, self.input_layer_size)
                target = target.view(-1, self.input_layer_size)
                self.optimizer.zero_grad()
                net_out = self.model(data)
                loss = self.criterion(net_out, target)
                loss.backward()
                self.optimizer.step()
            epoch_loss = loss.detach()
            logs['MSE loss'] = epoch_loss.item()
            liveloss.update(logs)
            liveloss.send()

        print("Number of weight coefficients:",
              self.model.number_of_weight_coefficients)
コード例 #10
0
def test_plot_losses():
    """Test basic usage"""
    loss_plotter = PlotLosses(outputs=(CheckOutput(), ))
    loss_plotter.update({
        'acc': 0.5,
        'val_acc': 0.4,
        'loss': 1.2,
        'val_loss': 1.1
    })
    loss_plotter.update({
        'acc': 0.55,
        'loss': 1.1,
    })
    loss_plotter.update({
        'acc': 0.65,
        'val_acc': 0.55,
        'loss': 1.0,
        'val_loss': 0.9
    })
    loss_plotter.update({
        'acc': 0.55,
        'loss': 1.1,
    })
    loss_plotter.send()
コード例 #11
0
def test_extrema_print():
    """Test if plugin object cache contains valid values"""
    groups = {'accuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss']}
    plugin = ExtremaPrinter()
    outputs = (plugin, )
    liveplot = PlotLosses(outputs=outputs, groups=groups)
    liveplot.update({'acc': 0.5, 'val_acc': 0.4, 'loss': 1.2, 'val_loss': 1.1})
    liveplot.update({
        'acc': 0.55,
        'val_acc': 0.45,
        'loss': 1.1,
        'val_loss': 1.0
    })
    liveplot.update({
        'acc': 0.65,
        'val_acc': 0.35,
        'loss': 0.5,
        'val_loss': 0.9
    })
    liveplot.update({
        'acc': 0.65,
        'val_acc': 0.55,
        'loss': 1.0,
        'val_loss': 0.9
    })
    liveplot.send()
    message = liveplot.outputs[0].last_message
    ref_message = '\n'.join([
        'accuracy',
        '\ttraining         \t (min:    0.500, max:    0.650, cur:    0.650)',
        '\tvalidation       \t (min:    0.350, max:    0.550, cur:    0.550)',
        'log-loss',
        '\ttraining         \t (min:    0.500, max:    1.200, cur:    1.000)',
        '\tvalidation       \t (min:    0.900, max:    1.100, cur:    0.900)'
    ])
    assert message == ref_message
コード例 #12
0
def train_eval_loop(
    model: Module,
    train_dataset: Dataset,
    val_dataset: Dataset,
    lr: float = 1e-4,
    epoch_n: int = 10,
    batch_size: int = 32,
    device=None,
    early_stopping_patience: int = 10,
    l2_reg_alpha: float = 0,
    max_batches_per_epoch_train: int = 10000,
    max_batches_per_epoch_val: int = 1000,
    optimizer_ctor: Optimizer = None,
    lr_scheduler_ctor=None,
    shuffle_train=True,
    dataloader_workers_n: int = 0,
    verbose_batch: bool = False,
    verbose_liveloss=True,
    prev_loss: Dict[str, List[float]] = {}
) -> Tuple[float, Module, Dict[str, List[float]]]:
    """
    Цикл для обучения модели. После каждой эпохи качество модели оценивается по отложенной выборке.
    :param prev_loss: лоссы от предыдущего цикла обучения
    :param verbose_batch:
    :param model: torch.nn.Module - обучаемая модель
    :param train_dataset: torch.utils.data.Dataset - данные для обучения
    :param val_dataset: torch.utils.data.Dataset - данные для оценки качества
    :param criterion: функция потерь для настройки модели
    :param lr: скорость обучения
    :param epoch_n: максимальное количество эпох
    :param batch_size: количество примеров, обрабатываемых моделью за одну итерацию
    :param device: cuda/cpu - устройство, на котором выполнять вычисления
    :param early_stopping_patience: наибольшее количество эпох, в течение которых допускается
        отсутствие улучшения модели, чтобы обучение продолжалось.
    :param l2_reg_alpha: коэффициент L2-регуляризации
    :param max_batches_per_epoch_train: максимальное количество итераций на одну эпоху обучения
    :param max_batches_per_epoch_val: максимальное количество итераций на одну эпоху валидации
    :param optimizer_ctor
    :param optimizer_params
    :param lr_scheduler_ctor
    :param shuffle_train
    :param dataloader_workers_n
    :return: кортеж из двух элементов:
        - среднее значение функции потерь на валидации на лучшей эпохе
        - лучшая модель
    """
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)
    model.to(device)

    if optimizer_ctor is None:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=l2_reg_alpha)
    else:
        optimizer = optimizer_ctor(model.parameters())

    if lr_scheduler_ctor is not None:
        lr_scheduler = lr_scheduler_ctor(optimizer)
    else:
        lr_scheduler = None

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=shuffle_train,
                                  num_workers=dataloader_workers_n)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=dataloader_workers_n)

    best_val_loss = float('inf')
    best_epoch_i = 0
    best_model = copy.deepcopy(model)

    losses = {
        'train_loss': prev_loss.get('train_loss', []),
        'valid_loss': prev_loss.get('val_loss', [])
    }
    if verbose_liveloss:
        liveloss = PlotLosses()
    for epoch_i in range(epoch_n):
        try:
            epoch_start = datetime.datetime.now()
            print('Эпоха {}'.format(epoch_i))

            model.train()
            mean_train_loss = 0
            train_batches_n = 0
            for batch_i, (batch_x, batch_y) in enumerate(train_dataloader):
                start_batch = time.time()
                if batch_i > max_batches_per_epoch_train:
                    break

                mask = (batch_x[:, :, 1] != 0)

                batch_x = copy_data_to_device(batch_x, device)
                batch_y = copy_data_to_device(batch_y, device)
                mask = copy_data_to_device(mask, device)
                # set_trace()
                pred = model(batch_x)

                loss = -model.crf(pred.permute(0, 2, 1), batch_y,
                                  mask) / batch_size
                # loss = criterion(pred, batch_y)

                model.zero_grad()
                loss.backward()

                optimizer.step()

                mean_train_loss += float(loss)
                train_batches_n += 1
                if verbose_batch:
                    print(
                        f"Батч {batch_i} выполнен за {time.time() - start_batch:.2f} секунд"
                    )

            mean_train_loss /= train_batches_n
            print('Эпоха: {} итераций, {:0.2f} сек'.format(
                train_batches_n,
                (datetime.datetime.now() - epoch_start).total_seconds()))
            print('Среднее значение функции потерь на обучении',
                  mean_train_loss)
            losses['train_loss'].append(mean_train_loss)

            model.eval()
            mean_val_loss = 0
            val_batches_n = 0

            with torch.no_grad():
                for batch_i, (batch_x, batch_y) in enumerate(val_dataloader):
                    if batch_i > max_batches_per_epoch_val:
                        break

                    mask = (batch_x[:, :, 1] != 0)

                    batch_x = copy_data_to_device(batch_x, device)
                    batch_y = copy_data_to_device(batch_y, device)
                    mask = copy_data_to_device(mask, device)

                    pred = model(batch_x)
                    loss = -model.crf(pred.permute(0, 2, 1), batch_y,
                                      mask) / batch_size

                    mean_val_loss += float(loss)
                    val_batches_n += 1

            mean_val_loss /= val_batches_n
            print('Среднее значение функции потерь на валидации',
                  mean_val_loss)
            losses['valid_loss'].append(mean_val_loss)

            logs = {'log loss': mean_train_loss, 'val_log loss': mean_val_loss}

            if mean_val_loss < best_val_loss:
                best_epoch_i = epoch_i
                best_val_loss = mean_val_loss
                best_model = copy.deepcopy(model)
                print('Новая лучшая модель!')
            elif epoch_i - best_epoch_i > early_stopping_patience:
                print(
                    'Модель не улучшилась за последние {} эпох, прекращаем обучение'
                    .format(early_stopping_patience))
                break

            if lr_scheduler is not None:
                lr_scheduler.step(mean_val_loss)

            print()
        except KeyboardInterrupt:
            print('Досрочно остановлено пользователем')
            break
        except Exception as ex:
            print('Ошибка при обучении: {}\n{}'.format(ex,
                                                       traceback.format_exc()))
            break
        if verbose_liveloss:
            liveloss.update(logs)
            liveloss.send()

    return best_val_loss, best_model, losses
コード例 #13
0
    def _train(self, ckpt=None, is_retrain=False, plot_verbosity=True):
        print(
            "Note that the sparsity regularizations are not implemented yet..."
        )
        if (ckpt):
            """in case training needs to be started from a checkpoint (Eg.: Case training a pre-trained model)"""
            self.optimizer.load_state_dict(ckpt['optimizer_state_dict'])
            if self.lr_scheduler is not None:
                self.lr_scheduler.load_state_dict(
                    ckpt['lr_scheduler_state_dict'])
            self.model.load_state_dict(ckpt['model_state_dict'])

        args = self.args
        n_epochs = args.retrain_epochs if is_retrain else args.train_epochs
        best_ep, best_loss = 0, np.inf
        best_ckpt_path = args.model_path + '_best' + ("_retrain"
                                                      if is_retrain else "")
        liveloss = PlotLosses()
        loss_history = []

        if (not is_retrain):
            """Get the rewind epoch details for checkpointing."""
            nB = len(self.train_dataloader.dataset) / args.batch_size
            rewind_epochs = args.rewind_epoch
            rewind_ep = int(rewind_epochs)
            rewind_residual_batch = nB * (rewind_epochs - rewind_ep)

        for ep in range(n_epochs):
            # TRAINING
            epoch_train_loss = 0.
            self.model.train()
            for i, batch in tqdm(enumerate(self.train_dataloader)):
                if (not is_retrain) and (ep == rewind_ep
                                         and i >= rewind_residual_batch):
                    # if (args.retrain_mode=='weight-rewinding')
                    # Checkpoint optimizer, lr_scheduler, and weights after 1.4 epochs for weight/ lr rewinding purposes
                    w_rewind_ckpt = {
                        "model_state_dict":
                        self.model.state_dict(),
                        "optimizer_state_dict":
                        self.optimizer.state_dict(),
                        "lr_scheduler_state_dict":
                        self.lr_scheduler.state_dict()
                        if self.lr_scheduler is not None else None,
                        "epoch":
                        rewind_epochs
                    }
                    torch.save(w_rewind_ckpt, self.rewind_ckpt_path)
                # perform the training
                loss = self.model(
                    batch
                )  # model performs the outpur computation and the loss computation
                self.optimizer.zero_grad()
                if self.args.sparsity_reg == 'l1':
                    loss += self._l1_reg()
                loss.backward()
                self.optimizer.step()
                # store the loss for logging
                epoch_train_loss += loss.cpu().data.item() * len(batch[0])
                # step learning rate
                if self.lr_scheduler is not None:
                    self.lr_scheduler.step()
            epoch_train_loss /= len(self.train_dataloader.dataset)
            # VALIDATION
            with torch.no_grad():
                epoch_val_loss = 0.
                self.model.eval()
                for batch in tqdm(self.val_dataloader):
                    loss = self.model(batch)
                    epoch_val_loss += loss.cpu().data.item() * len(batch[0])
                epoch_val_loss /= len(self.val_dataloader.dataset)
            # PLOT THE METRICS
            if plot_verbosity:
                plot_dict = {
                    "loss": epoch_train_loss,
                    "val_loss": epoch_val_loss
                }
                if self.compute_val_performance is not None:
                    plot_dict.update({
                        "val_performance":
                        self.compute_val_performance(self.model,
                                                     self.val_dataloader,
                                                     self.device)
                    })
                liveloss.update(plot_dict)
                liveloss.send()
            loss_history.append((epoch_train_loss, epoch_val_loss))
            # DO THE EARLY STOPPING
            if (args.use_early_stop):
                if (epoch_train_loss > best_loss):
                    if (args.patience + best_ep < ep):
                        break
                else:
                    best_ep = ep
                    best_loss = epoch_train_loss
                    best_ckpt = {
                        "model_state_dict":
                        self.model.state_dict(),
                        "optimizer_state_dict":
                        self.optimizer.state_dict(),
                        "lr_scheduler_state_dict":
                        self.lr_scheduler.state_dict()
                        if self.lr_scheduler is not None else None,
                        "epoch":
                        best_ep
                    }
                    torch.save(best_ckpt, best_ckpt_path)
        if not (ep == best_ep):
            best_ckpt = torch.load(best_ckpt_path)
            self.model.load_state_dict(best_ckpt['model_state_dict'])
            self.lr_scheduler.load_state_dict(
                best_ckpt['lr_scheduler_state_dict'])
            self.optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        return loss_history
コード例 #14
0
def trainer(classifier,
            optimizer,
            scheduler,
            epochs,
            early_stop,
            train_dataloader,
            validation_dataloader,
            save_file,
            seed_val=0,
            accumulation_steps=1):
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        classifier = nn.DataParallel(classifier)
    classifier.to(device)

    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    best = (np.inf, -1, -np.inf, None, None)

    liveloss = PlotLosses()
    LossHistory = []
    for epoch_i in range(0, epochs):
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')
        classifier.train()
        epoch_loss = 0.
        start = time.time()
        classifier.zero_grad()
        for step, batch in enumerate(train_dataloader):
            logs = {}
            b_inputs = batch[0].to(device)
            b_labels = batch[1].to(device)
            b_mask = batch[2].to(device)
            b_ids = batch[3].to(device)

            loss, logits = classifier(input_ids=b_inputs,
                                      attention_mask=b_mask,
                                      token_type_ids=b_ids,
                                      labels=b_labels)

            if torch.cuda.device_count() > 1:
                loss = loss.sum()
            loss.backward()
            if (step + 1) % accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                classifier.zero_grad()

            batch_loss = loss.cpu().item()
            epoch_loss += loss.cpu().item()

            if (step % 1000 == 0):
                print("Step %i with loss %f elapsed time %f" %
                      (step, batch_loss, time.time() - start))

        print('Evaluating...')
        classifier.eval()
        dev_loss = 0.
        total_eval_accuracy = 0.
        y_preds = None
        y_true = None
        for batch in validation_dataloader:
            b_inputs = batch[0].to(device)
            b_labels = batch[1].to(device)
            b_mask = batch[2].to(device)
            b_ids = batch[3].to(device)

            with torch.no_grad():
                loss, logits = classifier(input_ids=b_inputs,
                                          attention_mask=b_mask,
                                          token_type_ids=b_ids,
                                          labels=b_labels)
                if torch.cuda.device_count() > 1:
                    loss = loss.sum()

            dev_loss += loss.cpu().item()
            label_ids = b_labels.cpu().numpy()
            logits = logits.detach().cpu().numpy()
            total_eval_accuracy += flat_accuracy(logits, label_ids)
            if y_preds is None:
                y_preds = np.argmax(logits, axis=1)
                y_true = label_ids
            else:
                y_preds = np.concatenate((y_preds, np.argmax(logits, axis=1)))
                y_true = np.concatenate((y_true, label_ids))

        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        f1_score_1 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary")
        f1_score_0 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary",
                                                     pos_label=0)

        print("Epoch %i with dev loss %f and dev accuracy %f" %
              (epoch_i, dev_loss, avg_val_accuracy))
        logs["val_loss"] = dev_loss / len(validation_dataloader)
        logs["loss"] = epoch_loss / len(train_dataloader)
        logs["val_accuracy"] = avg_val_accuracy
        liveloss.update(logs)
        LossHistory.append(logs["loss"])
        liveloss.send()

        if (epoch_i - best[1] >= early_stop and best[0] < dev_loss):
            print("early_stopping, epoch:", epoch_i + 1)
            break
        elif (best[0] > dev_loss):
            best = (dev_loss, epoch_i, avg_val_accuracy, f1_score_1,
                    f1_score_0)
            torch.save(classifier.state_dict(), 'checkpoint_big.pt')

    print("Final dev loss %f Final Train Loss %f Final dev accuracy %f" %
          (dev_loss, epoch_loss, avg_val_accuracy))
    print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2]))
    print("F1_score Sarcasm ", f1_score_1)
    print("F1_score Non-Sarcasm ", f1_score_0)

    return classifier, LossHistory
コード例 #15
0
def train_clean(net, optimizer, dataloader, args):
    liveloss = PlotLosses()
    if (args['USE_SCHEDULER']):
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, args['sched_milestones'], gamma=args['sched_gamma'])

    for e in range(args['N_EPOCHS']):
        logs = {}
        for phase in ['train', 'val']:

            prefix = ''
            if (phase == 'train'):
                net.train()
            else:
                net.eval()
                prefix = 'val_'

            n_samples = len(dataloader[phase].dataset)
            n_batches = len(dataloader[phase])
            running_loss = 0.0
            running_acc = 0.0
            for batch_id, (data, target) in enumerate(tqdm(dataloader[phase])):

                if (args['USE_CUDA']):
                    data, target = data.cuda(), target.cuda()
                output, reconstructions, masked = net(data)
                loss = net.loss(data, output, target, reconstructions)
                if (phase == 'train'):
                    if (batch_id == n_batches - 1):
                        img1 = data[0].reshape(28, 28).detach().cpu().numpy()
                        img2 = reconstructions[0].reshape(
                            28, 28).detach().cpu().numpy()
                        weight = net.decoder.reconstraction_layers[0].weight[
                            0][:3]
                        grad = net.decoder.reconstraction_layers[
                            0].weight.grad[0][:3].data

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                running_acc += torch.sum(masked == target).item()
                running_loss += loss.item()

            logs[prefix + 'loss'] = running_loss / float(n_samples)
            logs[prefix + 'accuracy'] = running_acc / float(n_samples)

            #Scheduler
            if (args['USE_SCHEDULER'] and phase == 'train'):
                scheduler.step()
                for param in optimizer.param_groups:
                    print("LR for the epoch is:", param['lr'])

        liveloss.update(logs)
        liveloss.send()
        f, axarr = plt.subplots(1, 2)
        axarr[0].imshow(img1)
        axarr[1].imshow(img2)
        plt.show()
        print("Weights of Reconstruction Layer:", weight)
        print("Grads of Reconstruction Layer:", grad)
コード例 #16
0
    def train_advanced(self, data_loaders, show_plot=True):
        liveloss = PlotLosses()

        how_near = 0.2

        for epoch in range(self.num_epochs):
            logs = {}

            for phase in ['train', 'validation']:
                if phase == 'train':
                    self.train()
                else:
                    self.eval()

                running_loss = 0.0

                for inputs, labels in data_loaders[phase]:

                    inputs = T.DoubleTensor(inputs).to(self.device)
                    targets = T.DoubleTensor(inputs).to(self.device)

                    #inputs = T.DoubleTensor(inputs)
                    inputs = Variable(inputs).to(self.device)
                    targets = Variable(targets).to(self.device)

                    #self.optimizer.zero_grad()
                    #outputs = self.forward(inputs)
                    outputs = self.encoder(inputs)
                    outputs = self.decoder(outputs)
                    loss = self.criterion(outputs, inputs)

                    if phase == 'train':
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()

                    preds = outputs.view(len(inputs), self.original_dim)
                    targets = targets.view(len(inputs), self.original_dim)
                    #preds = preds.detach().cpu().numpy()
                    #targets = targets.detach().cpu().numpy()

                    #print(preds)
                    #print('---')
                    #print(targets)
                    #preds = outputs.item()

                    running_loss += loss.detach() * inputs.size(0)
                    '''
                    if T.sum((T.abs(preds - targets) < T.abs(how_near*preds))):
                        n_corrects += 1
                    else:
                        n_wrongs += 1
                    '''
                epoch_loss = running_loss / len(data_loaders[phase].dataset)
                #epoch_acc = (n_corrects*100) / len(data_loaders[phase].dataset)
                epoch_acc = self.accuracy(targets, preds)

                prefix = ''
                if phase == 'validation':
                    prefix = 'val_'

                    #print('[Model] epoch=%s, loss=%s , acc=%s' % ( epoch, loss.item(), epoch_acc.item))
                    print('[Model] epoch=%s, loss1=%s, loss2=%s  acc=%s' %
                          (epoch, loss.item(), epoch_loss.item(), epoch_acc))

                logs[prefix + 'log loss'] = epoch_loss.item()
                logs[prefix + 'accuracy'] = epoch_acc

            if show_plot:
                liveloss.update(logs)
                liveloss.send()
コード例 #17
0
# plot
liveloss = PlotLosses()

# train loop
for ep in range(epoch):
    s_time = time.time()
    p_loss_v = 0
    print(f'start ep: {ep}')

    for it, (batch_x, batch_y) in enumerate(train_loader):
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        predict = model(batch_x)
        p_loss = loss(predict, batch_y)
        p_loss_v = p_loss.item()
        p_loss.backward()
        optimizer.step()

        # plot
        if it % 50 == 0:
            liveloss.update({'loss': p_loss_v})
            liveloss.send()

    print(f'end ep: {ep} @ {time.time()-s_time:.3f}s')

    if (ep + 1) % 2 == 0:
        torch.save(model.state_dict(), f'save/ep_{ep+1}.pth')
コード例 #18
0
ファイル: utils.py プロジェクト: liuziyuan827/test1
class Logger():
    def __init__(self, n_epochs, batches_epoch, out_dir, start_epoch=1):
        # self.viz = Visdom()
        self.n_epochs = n_epochs
        self.batches_epoch = batches_epoch
        self.epoch = start_epoch
        self.batch = 1
        self.prev_time = time.time()
        self.mean_period = 0
        self.losses = {}
        self.loss_windows = {}
        self.image_windows = {}
        self.out_dir = out_dir
        self.to_image = transforms.ToPILImage()
        self.liveloss = PlotLosses()

    def log(self, losses=None, images=None):
        pass
        self.mean_period += (time.time() - self.prev_time)
        self.prev_time = time.time()

        sys.stdout.write(
            '\rEpoch %03d/%03d [%04d/%04d] -- ' %
            (self.epoch, self.n_epochs, self.batch, self.batches_epoch))

        plots = {}

        for i, loss_name in enumerate(losses.keys()):
            if loss_name not in self.losses:
                self.losses[loss_name] = losses[loss_name].data
            else:
                self.losses[loss_name] += losses[loss_name].data

            if (i + 1) == len(losses.keys()):
                sys.stdout.write(
                    '%s: %.4f -- ' %
                    (loss_name, self.losses[loss_name] / self.batch))
            else:
                sys.stdout.write(
                    '%s: %.4f | ' %
                    (loss_name, self.losses[loss_name] / self.batch))

        batches_done = self.batches_epoch * (self.epoch - 1) + self.batch
        batches_left = self.batches_epoch * (
            self.n_epochs - self.epoch) + self.batches_epoch - self.batch
        sys.stdout.write('ETA: %s' % (datetime.timedelta(
            seconds=batches_left * self.mean_period / batches_done)))

        if self.batch % 10 == 0:
            # Save images
            plt.ioff()
            fig = plt.figure(figsize=(100, 50))
            for i, (image_name, tensor) in enumerate(images.items()):
                ax = plt.subplot(1, len(images), i + 1)
                ax.imshow(self.to_image(tensor.cpu().data[0]))
            fig.savefig(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch))
            plt.close(fig)
            # self.to_image(images["composed"].cpu().data[0]).save(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch))
            # plt.close(fig)

        # End of epoch
        if (self.batch % self.batches_epoch) == 0:
            # Plot losses
            for i, (loss_name, loss) in enumerate(self.losses.items()):
                #         if loss_name not in self.loss_windows:
                #             self.loss_windows[loss_name] = self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]),
                #                                                             opts={'xlabel': 'epochs', 'ylabel': loss_name, 'title': loss_name})
                #         else:
                #             self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]), win=self.loss_windows[loss_name], update='append')

                plots[loss_name] = self.losses[loss_name] / self.batch

                # Reset losses for next epoch
                self.losses[loss_name] = 0.0

            self.liveloss.update(plots)
            self.liveloss.send()

            self.epoch += 1
            self.batch = 1
            sys.stdout.write('\n')
        else:
            self.batch += 1
コード例 #19
0
        ind.fitness.values = fit
        
    #nova população
    population[:] = offspring
        
    #pega melhor e pior indivíduos para montar o gráfico
    top = tools.selBest(population, k=1)
    worst = tools.selWorst(population, k=1)
    
    avg_h = avg_h/len(population)
    top_h = nqueen_fitness(top[0])[0]
    worst_h = nqueen_fitness(worst[0])[0]
    plotlosses.update({'top': top_h,
                       'average': avg_h,
                       'worst': worst_h})
    plotlosses.send()
    
    #Avalia critério de parada
    if(nqueen_fitness(top[0])[0] == 0): 
        print(top[0])
        resultado = binToDec(top[0],log_N)
        #dataframe
        eixos = [i for i in range(N)]
        estado_inicial  = pd.DataFrame(index=(eixos),columns=(eixos))
        estadoInicial = list(random.randrange(N) for i in range(N))
        for i in range(len(estadoInicial)):
            estado_inicial[eixos[i]][resultado[i]] = 'rainha'
            

        break
コード例 #20
0
class Trainer(object):
    def __init__(
        self,
        model=None,
        data_loader=None,
        train_times=1000,
        lr=1e-3,
        alpha=0.5,
        use_gpu=True,
        opt_method="sgd",
        save_steps=None,
        checkpoint_dir=None,
    ):

        self.work_threads = 8
        self.train_times = train_times

        self.opt_method = opt_method
        self.optimizer = None
        self.lr_decay = 0
        self.weight_decay = 0
        self.alpha = alpha
        self.lr = lr

        self.model = model
        self.data_loader = data_loader
        self.use_gpu = use_gpu
        self.save_steps = save_steps
        self.checkpoint_dir = checkpoint_dir

        self.liveplot = PlotLosses()

    def train_one_step(self, data, stage=1):
        self.optimizer.zero_grad()
        self.model.zero_grad()
        loss = self.model({
            'batch_h': self.to_var(data['batch_h'], self.use_gpu),
            'batch_t': self.to_var(data['batch_t'], self.use_gpu),
            'batch_r': self.to_var(data['batch_r'], self.use_gpu),
            'batch_y': self.to_var(data['batch_y'], self.use_gpu),
            'mode': data['mode'],
            'stage': stage
        })

        loss.backward()
        nn.utils.clip_grad_norm_(self.model.parameters(), 2)
        self.optimizer.step()
        return loss.item()

    def run(self,
            lr=None,
            alpha=None,
            weight_decay=None,
            train_times=None,
            stage=1,
            multiplier=1):
        if lr:
            self.lr = lr
        if alpha:
            self.alpha = alpha
        if weight_decay:
            self.weight_decay = weight_decay
        if train_times:
            self.train_times = train_times
        if self.use_gpu:
            self.model.cuda()

        if self.optimizer is not None:
            pass
        elif self.opt_method == "Adagrad" or self.opt_method == "adagrad":
            self.optimizer = optim.Adagrad(
                self.model.parameters(),
                lr=self.lr,
                lr_decay=self.lr_decay,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "Adadelta" or self.opt_method == "adadelta":
            self.optimizer = optim.Adadelta(
                self.model.parameters(),
                lr=self.lr,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "Adam" or self.opt_method == "adam":
            self.optimizer = optim.Adam(
                self.model.parameters(),
                lr=self.lr,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "ranger":
            if not lr:
                self.optimizer = Ranger(self.model.parameters(),
                                        lr=self.lr,
                                        alpha=self.alpha)
            else:
                self.optimizer = Ranger(self.model.parameters(),
                                        lr=lr,
                                        alpha=self.alpha)
        elif self.opt_method == "rangerva":
            self.optimizer = RangerVA(self.model.parameters(), lr=lr)
        else:
            self.optimizer = optim.SGD(
                self.model.parameters(),
                lr=self.alpha,
                weight_decay=self.weight_decay,
            )
        print("Finish initializing...")

        # training_range = tqdm.tqdm(range(self.train_times))
        training_range = tqdm.trange(self.train_times)
        # training_range = range(self.train_times)
        for epoch in training_range:
            res = 0.0
            for data in self.data_loader:
                loss = multiplier * self.train_one_step(data, stage)
                res += loss
            self.liveplot.update({'loss': res})
            self.liveplot.send()
            if self.save_steps and self.checkpoint_dir and (
                    epoch + 1) % self.save_steps == 0:
                print("Epoch %d has finished, saving..." % (epoch))
                self.model.save_checkpoint(
                    os.path.join(self.checkpoint_dir + "-" + str(epoch) +
                                 ".ckpt"))

    def set_model(self, model):
        self.model = model

    def to_var(self, x, use_gpu):
        if use_gpu:
            return Variable(torch.from_numpy(x).cuda())
        else:
            return Variable(torch.from_numpy(x))

    def set_use_gpu(self, use_gpu):
        self.use_gpu = use_gpu

    def set_alpha(self, alpha):
        self.alpha = alpha

    def set_lr_decay(self, lr_decay):
        self.lr_decay = lr_decay

    def set_weight_decay(self, weight_decay):
        self.weight_decay = weight_decay

    def set_opt_method(self, opt_method):
        self.opt_method = opt_method

    def set_train_times(self, train_times):
        self.train_times = train_times

    def set_save_steps(self, save_steps, checkpoint_dir=None):
        self.save_steps = save_steps
        if not self.checkpoint_dir:
            self.set_checkpoint_dir(checkpoint_dir)

    def set_checkpoint_dir(self, checkpoint_dir):
        self.checkpoint_dir = checkpoint_dir
コード例 #21
0
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.

        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by
            user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined
            by item_id and the item feature columns.
        """

        del users_df, items_df

        # Shift item ids and user ids so that they are consecutive

        unique_item_ids = interactions_df['item_id'].unique()
        self.item_id_mapping = dict(
            zip(unique_item_ids, list(range(len(unique_item_ids)))))
        self.item_id_reverse_mapping = dict(
            zip(list(range(len(unique_item_ids))), unique_item_ids))
        unique_user_ids = interactions_df['user_id'].unique()
        self.user_id_mapping = dict(
            zip(unique_user_ids, list(range(len(unique_user_ids)))))
        self.user_id_reverse_mapping = dict(
            zip(list(range(len(unique_user_ids))), unique_user_ids))

        interactions_df = interactions_df.copy()
        interactions_df.replace(
            {
                'item_id': self.item_id_mapping,
                'user_id': self.user_id_mapping
            },
            inplace=True)

        # Get the number of items and users

        self.interactions_df = interactions_df
        n_users = np.max(interactions_df['user_id']) + 1
        n_items = np.max(interactions_df['item_id']) + 1

        # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)
        r = np.zeros(shape=(n_users, n_items))
        for idx, interaction in interactions_df.iterrows():
            r[int(interaction['user_id'])][int(interaction['item_id'])] = 1

        self.r = r

        # Generate negative interactions
        negative_interactions = []

        i = 0
        while i < self.n_neg_per_pos * len(interactions_df):
            sample_size = 1000
            user_ids = self.rng.choice(np.arange(n_users), size=sample_size)
            item_ids = self.rng.choice(np.arange(n_items), size=sample_size)

            j = 0
            while j < sample_size and i < self.n_neg_per_pos * len(
                    interactions_df):
                if r[user_ids[j]][item_ids[j]] == 0:
                    negative_interactions.append([user_ids[j], item_ids[j], 0])
                    i += 1
                j += 1

        interactions_df = pd.concat([
            interactions_df,
            pd.DataFrame(negative_interactions,
                         columns=['user_id', 'item_id', 'interacted'])
        ])

        # Initialize user and item embeddings as random vectors (from Gaussian distribution)

        self.user_repr = self.rng.normal(0,
                                         1,
                                         size=(r.shape[0], self.embedding_dim))
        self.item_repr = self.rng.normal(0,
                                         1,
                                         size=(r.shape[1], self.embedding_dim))

        # Initialize losses and loss visualization

        if self.print_type is not None and self.print_type == 'live':
            liveloss = PlotLosses()

        training_losses = deque(maxlen=50)
        training_avg_losses = []
        training_epoch_losses = []
        validation_losses = deque(maxlen=50)
        validation_avg_losses = []
        validation_epoch_losses = []
        last_training_total_loss = 0.0
        last_validation_total_loss = 0.0

        # Split the data

        interaction_ids = self.rng.permutation(len(interactions_df))
        train_validation_slice_idx = int(
            len(interactions_df) * (1 - self.validation_set_size))
        training_ids = interaction_ids[:train_validation_slice_idx]
        validation_ids = interaction_ids[train_validation_slice_idx:]

        # Train the model

        for epoch in range(self.n_epochs):
            if self.print_type is not None and self.print_type == 'live':
                logs = {}

            # Train

            training_losses.clear()
            training_total_loss = 0.0
            batch_idx = 0
            for idx in training_ids:
                user_id = int(interactions_df.iloc[idx]['user_id'])
                item_id = int(interactions_df.iloc[idx]['item_id'])

                e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id],
                                                    self.item_repr[item_id])
                self.user_repr[user_id] = self.user_repr[user_id] \
                    + self.lr * (e_ui * self.item_repr[item_id] - self.reg_l * self.user_repr[user_id])
                self.item_repr[item_id] = self.item_repr[item_id] \
                    + self.lr * (e_ui * self.user_repr[user_id] - self.reg_l * self.item_repr[item_id])

                loss = e_ui**2
                training_total_loss += loss

                if self.print_type is not None and self.print_type == 'text':
                    print(
                        "\rEpoch: {}\tBatch: {}\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}"
                        .format(epoch, batch_idx, last_training_total_loss,
                                last_validation_total_loss, loss),
                        end="")

                batch_idx += 1

                training_losses.append(loss)
                training_avg_losses.append(np.mean(training_losses))

            # Validate

            validation_losses.clear()
            validation_total_loss = 0.0
            for idx in validation_ids:
                user_id = int(interactions_df.iloc[idx]['user_id'])
                item_id = int(interactions_df.iloc[idx]['item_id'])

                e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id],
                                                    self.item_repr[item_id])

                loss = e_ui**2
                validation_total_loss += loss

                validation_losses.append(loss)
                validation_avg_losses.append(np.mean(validation_losses))

            # Save and print epoch losses

            training_last_avg_loss = training_total_loss / len(training_ids)
            training_epoch_losses.append(training_last_avg_loss)
            validation_last_avg_loss = validation_total_loss / len(
                validation_ids)
            validation_epoch_losses.append(validation_last_avg_loss)

            if self.print_type is not None and self.print_type == 'live' and epoch >= 3:
                # A bound on epoch prevents showing extremely high losses in the first epochs
                # noinspection PyUnboundLocalVariable
                logs['loss'] = training_last_avg_loss
                logs['val_loss'] = validation_last_avg_loss
                # noinspection PyUnboundLocalVariable
                liveloss.update(logs)
                liveloss.send()

        # Find the most popular items for the cold start problem

        offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(
            by='item_id').count()
        offers_count = offers_count.sort_values('user_id', ascending=False)
        self.most_popular_items = offers_count.index
コード例 #22
0
def trainer(cfg, train_id=None, num_workers=15, device=None):
    
    device = device or 'cuda:0' ##
    train_id = train_id or cfg['train_id']
    use_pretrained_vgg=cfg["use_pretrained_vgg"]
    batch_size=cfg["batch_size"]
    lr=cfg["lr"]
    num_epochs=cfg["num_epochs"]   
   
    model = ternausnet.models.UNet11(pretrained=use_pretrained_vgg)
    
    if cfg.get('first_freeze_layers', None) is not None:
        for i in range(cfg['first_freeze_layers']):
            for param in model.encoder[i].parameters():
                param.requires_grad = False
    
    if cfg['pretrained_model'] is not None:
        model.load_state_dict(torch.load(cfg['pretrained_model']))
    model = model.to(device)

    loss = nn.BCEWithLogitsLoss()
   
    optimizer = Adam(filter(lambda x: return x.requires_grad, model.parameters()), lr)    

    d_train = WaterDataset(cfg['train_img_list'], train_transform)
    d_val = WaterDataset(cfg['test_img_list'], test_transform)
    
    print(d_val[0][0].shape)
    

    dl_train = DataLoader(d_train, batch_size, shuffle=True, num_workers=num_workers)
    dl_val = DataLoader(d_val, batch_size, shuffle=False, num_workers=num_workers)

        
    metrics = {
        'val_acc': AccuracyMetric(0.5),
        'train_acc': AccuracyMetric(0.5),
        'val_loss': LossMetric(),
        'train_loss': LossMetric(),
        'train_lake_acc': LakeAccuracyMetric(0.5),
        'val_lake_acc': LakeAccuracyMetric(0.5),
        'train_nolake_acc': NoLakeAccuracyMetric(0.5),
        'val_nolake_acc': NoLakeAccuracyMetric(0.5),
        'val_miou': MIOUMetric(0.5),
        'train_miou': MIOUMetric(0.5),
        'val_f1': F1Metric(0.5),
        'train_f1': F1Metric(0.5)
    }
    
    groups = {
        'accuracy': ['train_acc', 'val_acc'], 
        'bce-loss': ['train_loss', 'val_loss'], 
        'lake-acc': ['train_lake_acc', 'val_lake_acc'],
        'nolake_acc': ['train_nolake_acc', 'val_nolake_acc'],
        'miou': ['train_miou', 'val_miou'],
        'f1': ['train_f1', 'val_f1']
    }
    plotlosses = PlotLosses(groups=groups)

    topk_val_losses = {}

    for epoch in range(num_epochs):
        print('train step')
        for name, metric in metrics.items():
            metric.reset()

        model.train()
        for idx, (im, gt) in enumerate(dl_train):
            im = im.to(device)
            gt = gt.to(device)
            optimizer.zero_grad()

            pred = model(im)
            L = loss(pred, gt)
            L.backward()
            assert pred.shape == gt.shape
            metrics['train_acc'].append(pred, gt)
            metrics['train_lake_acc'].append(pred, gt)
            metrics['train_nolake_acc'].append(pred, gt)
            metrics['train_miou'].append(pred, gt)
            metrics['train_f1'].append(pred, gt)
            metrics['train_loss'].append(L)
            optimizer.step()
        
        torch.cuda.empty_cache()
        
        model.eval()
        print('eval step')
        with torch.no_grad():
            for idx, (im, gt) in enumerate(dl_val):
                im = im.to(device)
                gt = gt.to(device)
                pred = model(im)
                L = loss(pred, gt)
                metrics['val_acc'].append(pred, gt)
                metrics['val_lake_acc'].append(pred, gt)
                metrics['val_nolake_acc'].append(pred, gt)
                metrics['val_miou'].append(pred, gt)
                metrics['val_f1'].append(pred, gt)
                metrics['val_loss'].append(L)
        torch.cuda.empty_cache()
        
        results = {key: metrics[key].result() for key in metrics}
        plotlosses.update(results)
        plotlosses.send()

        for name, metric in metrics.items():
            metric.history()

            
        history = {key: metrics[key].hist for key in metrics}
        
        
        save_models(model, topk_val_losses, metrics['val_loss'].result(), epoch, train_id, save_num_models=3)
    torch.save(model.state_dict(), 'model-latest.pth')
    
    with open(f'history-{train_id}.json', "w") as write_file:
            json.dump(history, write_file, indent=4)
コード例 #23
0
def trainer(classifier,
            optimizer,
            scheduler,
            epochs,
            early_stop,
            train_dataloader,
            validation_dataloader,
            save_file,
            seed_val=0,
            accumulation_steps=1):
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        classifier = nn.DataParallel(classifier)
    classifier.to(device)

    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    embedder = AlbertModel.from_pretrained('albert-base-v2')
    embedder.to(device)

    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    best = (np.inf, -1, -np.inf, None, None)

    liveloss = PlotLosses()
    LossHistory = []
    val_step = 0
    for epoch_i in range(0, epochs):
        logs = {}
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print("Global Learning Rate", optimizer.param_groups[0]["lr"])
        print('Training...')
        classifier.train()
        epoch_loss = 0.
        start = time.time()
        classifier.zero_grad()
        for step, batch in enumerate(train_dataloader):
            b_inputs_c = batch[0].to(device)
            b_inputs_r = batch[1].to(device)
            b_mask_c = batch[2].to(device)
            b_mask_r = batch[3].to(device)
            b_labels = batch[4].to(device)

            x_c = embedder(input_ids=b_inputs_c, attention_mask=b_mask_c)[0]
            x_r = embedder(input_ids=b_inputs_r, attention_mask=b_mask_r)[0]
            loss, logits = classifier(x_c.permute(1, 0, 2),
                                      x_r.permute(1, 0, 2), b_labels)

            if torch.cuda.device_count() > 1:
                loss = loss.sum()

            loss.backward()
            if (step + 1) % accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                classifier.zero_grad()

            batch_loss = loss.cpu().item()
            epoch_loss += loss.cpu().item()

            if (step % 100) == 0:
                print("Step %i with loss %.3f elapsed time %.3f" %
                      (step, epoch_loss / (step + 1), time.time() - start))
                # writer.add_scalar("Loss/train", epoch_loss/(step+1), global_step)
                # writer.flush()

        print('Evaluating...')
        classifier.eval()
        dev_loss = 0.
        total_eval_accuracy = 0.
        y_preds = None
        y_true = None
        for batch in validation_dataloader:
            b_inputs_c = batch[0].to(device)
            b_inputs_r = batch[1].to(device)
            b_mask_c = batch[2].to(device)
            b_mask_r = batch[3].to(device)
            b_labels = batch[4].to(device)

            with torch.no_grad():
                x_c = embedder(input_ids=b_inputs_c,
                               attention_mask=b_mask_c)[0]
                x_r = embedder(input_ids=b_inputs_r,
                               attention_mask=b_mask_r)[0]
                loss, logits = classifier(x_c.permute(1, 0, 2),
                                          x_r.permute(1, 0, 2), b_labels)
                if torch.cuda.device_count() > 1:
                    loss = loss.sum()

            dev_loss += loss.cpu().item()
            label_ids = b_labels.cpu().numpy()
            logits = logits.detach().cpu().numpy()
            total_eval_accuracy += flat_accuracy(logits, label_ids)
            if y_preds is None:
                y_preds = np.argmax(logits, axis=1)
                y_true = label_ids
            else:
                y_preds = np.concatenate((y_preds, np.argmax(logits, axis=1)))
                y_true = np.concatenate((y_true, label_ids))

        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        f1_score_1 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary")
        f1_score_0 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary",
                                                     pos_label=0)

        print("Epoch %i with dev loss %f and dev accuracy %f" %
              (epoch_i + 1, dev_loss, avg_val_accuracy))

        logs["val_loss"] = dev_loss / len(validation_dataloader)
        logs["loss"] = epoch_loss / len(train_dataloader)
        logs["val_accuracy"] = avg_val_accuracy
        liveloss.update(logs)
        LossHistory.append(logs["loss"])
        liveloss.send()

        if (val_step - best[1] >= early_stop and best[0] < dev_loss):
            print("early_stopping, epoch:", epoch_i + 1)
            print(
                "Final dev loss %f Final Train Loss %f Final dev accuracy %f" %
                (dev_loss, epoch_loss, avg_val_accuracy))
            print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2]))
            print("F1_score Sarcasm ", f1_score_1)
            print("F1_score Non-Sarcasm ", f1_score_0)

            return classifier
        elif (best[0] > dev_loss):
            best = (dev_loss, val_step, avg_val_accuracy, f1_score_1,
                    f1_score_0)
            torch.save(classifier.state_dict(), save_file)

        val_step += 1
        classifier.train()

    print("Final dev loss %f Final Train Loss %f Final dev accuracy %f" %
          (dev_loss, epoch_loss, avg_val_accuracy))
    print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2]))
    print("F1_score Sarcasm ", f1_score_1)
    print("F1_score Non-Sarcasm ", f1_score_0)

    return classifier
コード例 #24
0
    def train(self):
        """ Train the model
        """

        # initial setup
        epoch = 1
        loss_val_best = 100
        num_epochs_increased = 0
        epoch_best = 1
        liveloss = PlotLosses()
        logs = {}

        # Perform training
        while True:

            # Run one iteration of SGD
            t0 = time.time()
            loss_train = self.train_epoch()
            loss_train_eval = self.compute_loss(self.loader_train_eval)
            loss_val = self.compute_loss(self.loader_val)
            time_epoch = time.time() - t0
            self.logger.add_entry({
                'loss_train': loss_train,
                'loss_train_eval': loss_train_eval,
                'loss_val': loss_val
            })

            # run learing rate scheduler
            if self.scheduler:
                self.scheduler.step(loss_val)

            # save logger info
            if self.save_dir:
                self.logger.append(os.path.join(self.save_dir, 'log.txt'))

            # change in loss_val
            d_loss_val = (loss_val - loss_val_best) / loss_val_best * 100

            # display results
            logs['loss'] = loss_train_eval
            logs['val_loss'] = loss_val
            logs['percent improvement'] = (
                loss_val - loss_train_eval) / loss_train_eval * 100
            liveloss.update(logs)
            logs['val_percent improvement'] = d_loss_val
            liveloss.send()
            print(
                'E: {:} / Train: {:.3e} / Valid: {:.3e} / Diff Valid: {:.2f}% / Diff Valid-Train: {:.1f}% / Time: {:.2f}'
                .format(epoch, loss_train_eval, loss_val, d_loss_val,
                        (loss_val - loss_train_eval) / loss_train_eval * 100,
                        time_epoch))

            # if validation loss improves
            if d_loss_val < 0:
                num_epochs_increased = 0

                # record epoch and loss
                epoch_best = epoch
                loss_val_best = loss_val

                # save model weights
                if self.save_dir:
                    print('Validation loss improved. Saving model.')
                    torch.save(self.model.state_dict(),
                               os.path.join(self.save_dir, 'model.dat'))

            else:
                num_epochs_increased = num_epochs_increased + 1

            # stop training if we lose patience:
            if num_epochs_increased > self.patience:
                break

            # advance epoch counter
            epoch = epoch + 1
コード例 #25
0
ファイル: bprH.py プロジェクト: qiaojj/BPRH
    def fit(self,
            X,
            eval_X,
            y=None,
            model_saved_path='bprh_model.pkl',
            iter_to_save=5000,
            coselection_saved_path='data/item-set-coselection.pkl',
            iter_to_log=100,
            correlation=True,
            coselection=False,
            plot_metric=False,
            log_metric=False):
        # Here we do not load model -> train a new model
        if self.existed_model_path is None:
            # To make sure train and test works with inconsistent user and item list,
            # we transform user and item's string ID to int ID so that their ID is their index in U and V
            print("Registering Model Parameters")
            # rename user and item
            self.user_original_id_list = sorted(
                set(X.UserID).union(set(eval_X.UserID)))
            self.item_original_id_list = sorted(
                set(X.ItemID).union(set(eval_X.ItemID)))

            self.train_data = X.copy()
            self.test_data = eval_X.copy()

            self.train_data.UserID = self.train_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.train_data.ItemID = self.train_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.test_data.UserID = self.test_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.test_data.ItemID = self.test_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.item_list = [
                idx[0] for idx in enumerate(self.item_original_id_list)
            ]
            self.user_list = [
                idx[0] for idx in enumerate(self.user_original_id_list)
            ]

            self.num_u = len(self.user_list)
            self.num_i = len(self.item_list)

            # build I_u_t, I_u_a (pre-computing for acceleration)
            self.build_itemset_for_user()

            # Calculate auxiliary-target correlation C for every user and each types of auxiliary action
            if correlation:
                self.alpha_u = self.auxiliary_target_correlation(
                    X=self.train_data)
            else:
                print(
                    "No auxiliary-target correlation - all alpha_u equal to one"
                )
                alpha_u_all_ones = dict()
                user_set_bar = tqdm(self.user_list)
                for u in user_set_bar:
                    alpha_u_all_ones[u] = dict()
                    alpha_u_all_ones[u]['alpha'] = 1.0
                self.alpha_u = alpha_u_all_ones.copy()

            # Generate item-set based on co-selection
            if coselection:
                self.S, self.U_item = self.itemset_coselection(
                    X=self.train_data)

            # Initialization of User and Item Matrices
            if self.random_state is not None:
                np.random.seed(self.random_state)
            else:
                np.random.seed(0)

            print("Initializing User and Item Matrices")
            # NOTE: Initialization is influenced by mean and std
            self.U = np.random.normal(size=(self.num_u, self.dim + 1),
                                      loc=0.0,
                                      scale=0.1)
            self.V = np.random.normal(size=(self.dim + 1, self.num_i),
                                      loc=0.0,
                                      scale=0.1)
            # self.U = np.zeros(shape=(self.num_u, self.dim + 1))
            # self.V = np.zeros(shape=(self.dim + 1, self.num_i))
            self.U[:, -1] = 1.0
            # estimation is U dot V
            self.estimation = np.dot(self.U, self.V)

        # Configure loss plots layout
        if plot_metric:
            groups = {
                'Precision@K': ['Precision@5', 'Precision@10'],
                'Recall@K': ['Recall@5', 'Recall@10'],
                'AUC': ['AUC']
            }
            plot_losses = PlotLosses(groups=groups)

        # Start Iteration
        all_item = set(self.item_list)
        user_in_train = sorted(set(self.train_data.UserID))
        print("Start Training")
        with trange(self.num_iter) as t:
            for index in t:
                # Description will be displayed on the left
                # t.set_description('ITER %i' % index)

                # Build u, I, J, K
                # uniformly sample a user from U
                u = choice(user_in_train)

                # build I
                # uniformly sample a item i from I_u_t
                I_u_t = self.I_u_t_train[u]
                if len(I_u_t) != 0:
                    i = choice(sorted(I_u_t))
                    # build I = I_u_t cap S_i
                    if coselection:
                        I = I_u_t.intersection(self.S[i])
                    else:
                        # if no coselection, we set I as the set of purchased items by user u
                        # no uniform sampling, like COFISET
                        I = I_u_t
                else:  # if no item in I_u_t, then set I to empty set
                    i = None
                    I = set()

                # build J, since we only have one auxiliary action, we follow the uniform sampling
                I_u_oa = self.I_u_a_train[u] - I_u_t
                if len(I_u_oa) != 0:
                    j = choice(sorted(I_u_oa))
                    if coselection:
                        # NOTE: typo in paper?
                        J = I_u_oa.intersection(self.S[j])
                    else:
                        # if no coselection, we set J as the set of only-auxiliary items by user u
                        # no uniform sampling, like COFISET
                        J = I_u_oa
                else:  # if no item in I_u_oa, then set J to empty set
                    j = None
                    J = set()

                # build K
                I_u_n = all_item - I_u_t - I_u_oa
                if len(I_u_n) != 0:
                    k = choice(sorted(I_u_n))
                    # build K
                    if coselection:
                        # NOTE: typo in paper?
                        K = I_u_n.intersection(self.S[k])
                    else:
                        # if no coselection, we set K as the set of no-action items by user u
                        # no uniform sampling, like COFISET
                        K = I_u_n
                else:  # if no item in I_u_n, then set K to empty set
                    k = None
                    K = set()

                # calculate intermediate variables
                # get specific alpha_u
                spec_alpha_u = self.alpha_u[u]['alpha']

                U_u = self.U[u, :-1].copy()
                sorted_I = sorted(I)
                sorted_J = sorted(J)
                sorted_K = sorted(K)

                # get r_hat_uIJ, r_hat_uJK, r_hat_uIK
                r_hat_uI = np.average(
                    self.estimation[u, sorted_I]) if len(I) != 0 else np.array(
                        [0])
                r_hat_uJ = np.average(
                    self.estimation[u, sorted_J]) if len(J) != 0 else np.array(
                        [0])
                r_hat_uK = np.average(
                    self.estimation[u, sorted_K]) if len(K) != 0 else np.array(
                        [0])

                r_hat_uIJ = r_hat_uI - r_hat_uJ
                r_hat_uJK = r_hat_uJ - r_hat_uK
                r_hat_uIK = r_hat_uI - r_hat_uK
                # get V_bar_I, V_bar_J, V_bar_K
                V_bar_I = np.average(self.V[:-1, sorted_I],
                                     axis=1) if len(I) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_J = np.average(self.V[:-1, sorted_J],
                                     axis=1) if len(J) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_K = np.average(self.V[:-1, sorted_K],
                                     axis=1) if len(K) != 0 else np.zeros(
                                         shape=(self.dim, ))
                # get b_I, b_J, b_K
                b_I = np.average(
                    self.V[-1, sorted_I]) if len(I) != 0 else np.array([0])
                b_J = np.average(
                    self.V[-1, sorted_J]) if len(J) != 0 else np.array([0])
                b_K = np.average(
                    self.V[-1, sorted_K]) if len(K) != 0 else np.array([0])

                # here we want to examine the condition of empty sets
                indicator_I = indicator(len(I) == 0)
                indicator_J = indicator(len(J) == 0)
                indicator_K = indicator(len(K) == 0)
                indicator_sum = indicator_I + indicator_J + indicator_K

                if 0 <= indicator_sum <= 1:
                    # these are the cases when only one set are empty or no set is empty
                    # when all three are not empty, or I is empty, or K is empty, it is
                    # easy to rewrite the obj by multiplying the indicator
                    # when J is empty, we have to rewrite the obj
                    if indicator_J == 1:
                        # when J is empty

                        # NABLA U_u
                        df_dUu = sigmoid(-r_hat_uIK) * (V_bar_I - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I
                                  ) * sigmoid(-r_hat_uIK) / indicator_len(I)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # No change on J

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uIK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                    else:
                        # when J is not empty
                        # NABLA U_u
                        df_dUu = (1 - indicator_I) * sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * (
                                V_bar_I - V_bar_J) + \
                                 (1 - indicator_K) * sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I) * sigmoid(
                            -r_hat_uIJ / spec_alpha_u) / (indicator_len(I) *
                                                          spec_alpha_u)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # NABLA V_j
                        df_dbj = (1 - indicator_I) * (
                            -sigmoid(-r_hat_uIJ / spec_alpha_u) / spec_alpha_u
                            + (1 - indicator_K) *
                            sigmoid(-r_hat_uJK)) / indicator_len(J)
                        dR_dbj = 2 * self.lambda_b * b_J / indicator_len(J)
                        df_dVj = df_dbj * U_u
                        dR_dVj = 2 * self.lambda_v * V_bar_J / indicator_len(J)

                        # update V_j = V_j + gamma * (df_dVj - dR_dVj)
                        self.V[:-1, sorted_J] += self.gamma * (
                            df_dVj - dR_dVj)[:, None]  # trick: transpose here
                        # update b_j = b_j + gamma * (df_dbj - dR_dbj)
                        self.V[-1, sorted_J] += self.gamma * (df_dbj - dR_dbj)

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uJK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                else:
                    # these are the cases when at least two sets are empty
                    # at these cases, we ignore this user and continue the loop
                    continue

                # calculate loss
                # f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK))
                # regula = self.lambda_u * np.linalg.norm(U_u, ord=2) + self.lambda_v * (
                #        (np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + (
                #            np.linalg.norm(V_bar_J, ord=2) if len(J) != 0 else 0) + (
                #            np.linalg.norm(V_bar_K, ord=2)) if len(K) != 0 else 0) + self.lambda_b * (
                #                     (b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + (
                #                 b_K if len(K) != 0 else 0) ** 2)
                # bprh_loss = f_Theta - regula

                # update estimation
                old_estimation = self.estimation.copy()
                # self.estimation = np.dot(self.U, self.V)
                all_sampled_item = sorted(set.union(I, J, K))
                # for sampled_item in all_sampled_item:
                #    self.estimation[:, sampled_item] = np.dot(self.U, self.V[:, sampled_item])
                self.estimation[:, all_sampled_item] = np.dot(
                    self.U, self.V[:, all_sampled_item])
                # estimation changed
                est_changed = np.linalg.norm(self.estimation - old_estimation)

                # we only save model to file when the num of iter % iter_to_save == 0
                if (index + 1) % iter_to_save == 0:
                    self.save(model_path=model_saved_path + "_" + str(index))

                # we only calculate metric when the num of iter % iter_to_log == 0
                if (index + 1) % iter_to_log == 0:
                    if log_metric | plot_metric:
                        # calculate metrics on test data
                        user_to_eval = sorted(set(self.test_data.UserID))
                        scoring_list_5, precision_5, recall_5, avg_auc = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=5,
                            train_data_as_reference_flag=True)
                        scoring_list_10, precision_10, recall_10, _ = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=10,
                            train_data_as_reference_flag=True)
                    if log_metric:
                        self.eval_hist.append([
                            index, precision_5, precision_10, recall_5,
                            recall_10, avg_auc
                        ])

                    if plot_metric:
                        plot_losses.update({
                            'Precision@5': precision_5,
                            'Precision@10': precision_10,
                            'Recall@5': recall_5,
                            'Recall@10': recall_10,
                            'AUC': avg_auc
                        })
                        plot_losses.send()

                # Postfix will be displayed on the right,
                # formatted automatically based on argument's datatype
                t.set_postfix(est_changed=est_changed,
                              len_I=len(I),
                              len_J=len(J),
                              len_K=len(K))
コード例 #26
0
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.

        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by
            user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined
            by item_id and the item feature columns.
        """

        del users_df, items_df

        # Shift item ids and user ids so that they are consecutive

        unique_item_ids = interactions_df['item_id'].unique()
        self.item_id_mapping = dict(
            zip(unique_item_ids, list(range(len(unique_item_ids)))))
        self.item_id_reverse_mapping = dict(
            zip(list(range(len(unique_item_ids))), unique_item_ids))
        unique_user_ids = interactions_df['user_id'].unique()
        self.user_id_mapping = dict(
            zip(unique_user_ids, list(range(len(unique_user_ids)))))
        self.user_id_reverse_mapping = dict(
            zip(list(range(len(unique_user_ids))), unique_user_ids))

        interactions_df = interactions_df.copy()
        interactions_df.replace(
            {
                'item_id': self.item_id_mapping,
                'user_id': self.user_id_mapping
            },
            inplace=True)

        # Get the number of items and users

        self.interactions_df = interactions_df.copy()
        n_users = np.max(interactions_df['user_id']) + 1
        n_items = np.max(interactions_df['item_id']) + 1

        # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)
        r = np.zeros(shape=(n_users, n_items))
        for idx, interaction in interactions_df.iterrows():
            r[int(interaction['user_id'])][int(interaction['item_id'])] = 1

        self.r = r

        # Indicate positive interactions

        interactions_df.loc[:, 'interacted'] = 1

        # Generate negative interactions
        negative_interactions = []

        i = 0
        while i < self.n_neg_per_pos * len(interactions_df):
            sample_size = 1000
            user_ids = self.rng.choice(np.arange(n_users), size=sample_size)
            item_ids = self.rng.choice(np.arange(n_items), size=sample_size)

            j = 0
            while j < sample_size and i < self.n_neg_per_pos * len(
                    interactions_df):
                if r[user_ids[j]][item_ids[j]] == 0:
                    negative_interactions.append([user_ids[j], item_ids[j], 0])
                    i += 1
                j += 1

        interactions_df = pd.concat([
            interactions_df,
            pd.DataFrame(negative_interactions,
                         columns=['user_id', 'item_id', 'interacted'])
        ])
        interactions_df = interactions_df.reset_index(drop=True)

        # Initialize losses and loss visualization

        if self.print_type is not None and self.print_type == 'live':
            liveloss = PlotLosses()

        training_losses = deque(maxlen=50)
        training_avg_losses = []
        training_epoch_losses = []
        validation_losses = deque(maxlen=50)
        validation_avg_losses = []
        validation_epoch_losses = []
        last_training_total_loss = 0.0
        last_validation_total_loss = 0.0

        # Initialize the network

        self.nn_model = GMFModel(n_items, n_users, self.embedding_dim,
                                 self.seed)
        self.nn_model.train()
        self.nn_model.to(self.device)
        self.optimizer = optim.Adam(self.nn_model.parameters(),
                                    lr=self.lr,
                                    weight_decay=self.weight_decay)

        # Split the data

        if self.train:
            interaction_ids = self.rng.permutation(len(interactions_df))
            train_validation_slice_idx = int(
                len(interactions_df) * (1 - self.validation_set_size))
            training_ids = interaction_ids[:train_validation_slice_idx]
            validation_ids = interaction_ids[train_validation_slice_idx:]
        else:
            interaction_ids = self.rng.permutation(len(interactions_df))
            training_ids = interaction_ids
            validation_ids = []

        # Train the model

        for epoch in range(self.n_epochs):
            if self.print_type is not None and self.print_type == 'live':
                logs = {}

            # Train

            training_losses.clear()
            training_total_loss = 0.0

            self.rng.shuffle(training_ids)

            n_batches = int(np.ceil(len(training_ids) / self.batch_size))

            for batch_idx in range(n_batches):

                batch_ids = training_ids[(batch_idx *
                                          self.batch_size):((batch_idx + 1) *
                                                            self.batch_size)]

                batch = interactions_df.loc[batch_ids]
                batch_input = torch.from_numpy(
                    batch.loc[:, ['user_id', 'item_id']].values).long().to(
                        self.device)
                y_target = torch.from_numpy(
                    batch.loc[:,
                              ['interacted']].values).float().to(self.device)

                # Create responses

                y = self.nn_model(batch_input).clip(0.000001, 0.999999)

                # Define loss and backpropagate

                self.optimizer.zero_grad()
                loss = -(y_target * y.log() + (1 - y_target) *
                         (1 - y).log()).sum()

                loss.backward()
                self.optimizer.step()

                training_total_loss += loss.item()

                if self.print_type is not None and self.print_type == 'text':
                    print(
                        "\rEpoch: {}\tBatch: {}\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}"
                        .format(epoch, batch_idx, last_training_total_loss,
                                last_validation_total_loss, loss),
                        end="")

                training_losses.append(loss.item())
                training_avg_losses.append(np.mean(training_losses))

            # Validate

            validation_total_loss = 0.0

            batch = interactions_df.loc[validation_ids]
            batch_input = torch.from_numpy(
                batch.loc[:, ['user_id', 'item_id']].values).long().to(
                    self.device)
            y_target = torch.from_numpy(
                batch.loc[:, ['interacted']].values).float().to(self.device)

            # Create responses

            y = self.nn_model(batch_input).clip(0.000001, 0.999999)

            # Calculate validation loss

            loss = -(y_target * y.log() + (1 - y_target) * (1 - y).log()).sum()
            validation_total_loss += loss.item()

            # Save and print epoch losses

            training_last_avg_loss = training_total_loss / len(training_ids)
            if self.train:
                validation_last_avg_loss = validation_total_loss / len(
                    validation_ids)

            if self.print_type is not None and self.print_type == 'live' and epoch >= 0:
                # A bound on epoch prevents showing extremely high losses in the first epochs
                logs['loss'] = training_last_avg_loss
                if self.train:
                    logs['val_loss'] = validation_last_avg_loss
                liveloss.update(logs)
                liveloss.send()

        # Find the most popular items for the cold start problem

        offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(
            by='item_id').count()
        offers_count = offers_count.sort_values('user_id', ascending=False)
        self.most_popular_items = offers_count.index