コード例 #1
0
ファイル: vbcar.py プロジェクト: mindis/VBCAR
    def train(self):
        """Multiple training.

        Returns:
            None.
        """
        max_noprogress = 5
        _loss_train_min = 1e-5
        n_noprogress = 0

        process_bar = tqdm(range(self.iteration))
        liveloss = PlotLosses(fig_path=self.output_file_name + ".iter.pdf")
        loss_list = []
        _best_ndcg = 0
        for i in process_bar:
            logs = {}
            all_loss = 0
            kl_loss = 0
            batch_num = 0
            for batch_ndx, sample in enumerate(self.data_loader):
                pos_u = torch.tensor(
                    [triple[0] for triple in sample],
                    dtype=torch.int64,
                    device=self.device,
                )
                pos_i_1 = torch.tensor(
                    [triple[1] for triple in sample],
                    dtype=torch.int64,
                    device=self.device,
                )
                pos_i_2 = torch.tensor(
                    [triple[2] for triple in sample],
                    dtype=torch.int64,
                    device=self.device,
                )

                neg_u = torch.tensor(
                    self.data.user_sampler.sample(self.n_neg, len(sample)),
                    dtype=torch.int64,
                    device=self.device,
                )
                neg_i_1 = torch.tensor(
                    self.data.item_sampler.sample(self.n_neg, len(sample)),
                    dtype=torch.int64,
                    device=self.device,
                )
                neg_i_2 = torch.tensor(
                    self.data.item_sampler.sample(self.n_neg, len(sample)),
                    dtype=torch.int64,
                    device=self.device,
                )
                #                 print(pos_u,neg_u)

                self.optimizer.zero_grad()
                loss = self.model.forward(pos_u, pos_i_1, pos_i_2, neg_u,
                                          neg_i_2, neg_i_2)
                #                 print(loss)
                loss.backward()
                self.optimizer.step()
                all_loss = all_loss + loss
                kl_loss = kl_loss + self.model.kl_loss
                batch_num = batch_ndx
            if self.device.type == "cuda":
                all_loss = all_loss.cpu()
                if kl_loss != 0:
                    kl_loss = kl_loss.cpu()

            logs["loss"] = all_loss.item() / batch_num

            if self.show_result:
                data_i = np.random.randint(10)
                result = self.data.evaluate_vali(self.data.test[data_i],
                                                 self.model)
                logs["ndcg@10_test"], logs["recall@10_test"] = (
                    result["ndcg@10"],
                    result["recall@10"],
                )
                result = self.data.evaluate_vali(self.data.validate[data_i],
                                                 self.model)
                logs["ndcg@10_val"], logs["recall@10_val"] = (
                    result["ndcg@10"],
                    result["recall@10"],
                )
                if _best_ndcg < result["ndcg@10"]:
                    _best_ndcg = result["ndcg@10"]
                    self.best_model = copy.deepcopy(self.model.state_dict())
                    torch.save(self.best_model, self.output_file_name)
            if kl_loss != 0:
                logs["kl_loss"] = kl_loss.item() / batch_num
                logs["loss"] = logs["loss"] - logs["kl_loss"]

            loss_list.append(logs["loss"])

            if i > 1:
                if abs(loss_list[i] - loss_list[i - 1]) < _loss_train_min:
                    n_noprogress += 1
                else:
                    n_noprogress = 0

            liveloss.update(logs)
            liveloss.draw()
            process_bar.set_description(
                "Loss: %0.8f, lr: %0.6f" %
                (logs["loss"], self.optimizer.param_groups[0]["lr"]))
            print("=== #no progress: ", n_noprogress)

            if n_noprogress >= max_noprogress:
                liveloss.draw()
                break
            """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
            lr = self.initial_lr * (0.5**(i // 10))
            for param_group in self.optimizer.param_groups:
                param_group["lr"] = lr
            if i >= self.iteration - 1:
                liveloss.draw()
コード例 #2
0
def main():
    global best_test_bpd

    last_checkpoints = []
    lipschitz_constants = []
    ords = []

    # if args.resume:
    #     validate(args.begin_epoch - 1, model, ema)

    #liveloss = PlotLosses()

    #liveloss = PlotLosses()
    liveloss = PlotLosses()

    for epoch in range(args.begin_epoch, args.nepochs):
        logs = {}

        logger.info('Current LR {}'.format(optimizer.param_groups[0]['lr']))

        running_loss = train(epoch, model)

        #train(epoch, model)
        lipschitz_constants.append(get_lipschitz_constants(model))

        #ords.append(get_ords(model))

        #ords.append(get_ords(model))
        ords.append(get_ords(model))

        logger.info('Lipsh: {}'.format(pretty_repr(lipschitz_constants[-1])))
        logger.info('Order: {}'.format(pretty_repr(ords[-1])))

        #epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_loss = running_loss / len(
            datasets.CIFAR10(
                args.dataroot, train=True, transform=transform_train))

        logs['log loss'] = epoch_loss.item()

        liveloss.update(logs)
        liveloss.draw()

        if args.ema_val:
            test_bpd = validate(epoch, model, ema)
        else:
            test_bpd = validate(epoch, model)

        if args.scheduler and scheduler is not None:
            scheduler.step()

        if test_bpd < best_test_bpd:
            best_test_bpd = test_bpd

            utils.save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'args': args,
                    'ema': ema,
                    'test_bpd': test_bpd,
                },
                os.path.join(args.save, 'moMoModels'),
                epoch,
                last_checkpoints,
                num_checkpoints=5)
            """
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'mMoModels'), epoch, last_checkpoints, num_checkpoints=5)
            
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'mModels'), epoch, last_checkpoints, num_checkpoints=5)
            
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'models'), epoch, last_checkpoints, num_checkpoints=5)
            """

        torch.save(
            {
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'models',
                            '010mmoosttMoosttRecentt.pth'))
        """
コード例 #3
0
ファイル: trainer.py プロジェクト: Kotorinyanya/IMEGAT
def train_cross_validation(model_cls,
                           dataset,
                           dropout=0.0,
                           lr=1e-3,
                           weight_decay=1e-2,
                           num_epochs=200,
                           n_splits=10,
                           use_gpu=True,
                           dp=False,
                           ddp=False,
                           comment='',
                           tb_service_loc='192.168.192.57:6007',
                           batch_size=1,
                           num_workers=0,
                           pin_memory=False,
                           cuda_device=None,
                           tb_dir='runs',
                           model_save_dir='saved_models',
                           res_save_dir='res',
                           fold_no=None,
                           saved_model_path=None,
                           device_ids=None,
                           patience=20,
                           seed=None,
                           fold_seed=None,
                           save_model=False,
                           is_reg=True,
                           live_loss=True,
                           domain_cls=True,
                           final_cls=True):
    """
    :type fold_seed: int
    :param live_loss: bool
    :param is_reg: bool
    :param save_model: bool
    :param seed:
    :param patience: for early stopping
    :param device_ids: for ddp
    :param saved_model_path:
    :param fold_no: int
    :param ddp_port: str
    :param ddp: DDP
    :param cuda_device: list of int
    :param pin_memory: bool, DataLoader args
    :param num_workers: int, DataLoader args
    :param model_cls: pytorch Module cls
    :param dataset: instance
    :param dropout: float
    :param lr: float
    :param weight_decay:
    :param num_epochs:
    :param n_splits: number of kFolds
    :param use_gpu: bool
    :param dp: bool
    :param comment: comment in the logs, to filter runs in tensorboard
    :param tb_service_loc: tensorboard service location
    :param batch_size: Dataset args not DataLoader
    :return:
    """
    saved_args = locals()
    seed = int(time.time() % 1e4 * 1e5) if seed is None else seed
    saved_args['random_seed'] = seed

    torch.manual_seed(seed)
    np.random.seed(seed)
    if use_gpu:
        torch.cuda.manual_seed_all(seed)
        # torch.backends.cudnn.deterministic = True
        # torch.backends.cudnn.benchmark = False

    model_name = model_cls.__name__

    if not cuda_device:
        if device_ids and dp:
            device = device_ids[0]
        else:
            device = torch.device(
                'cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
    else:
        device = cuda_device

    device_count = torch.cuda.device_count() if dp else 1
    device_count = len(device_ids) if (device_ids is not None
                                       and dp) else device_count

    batch_size = batch_size * device_count

    # TensorBoard
    log_dir_base = get_model_log_dir(comment, model_name)
    if tb_service_loc is not None:
        print("TensorBoard available at http://{1}/#scalars&regexInput={0}".
              format(log_dir_base, tb_service_loc))
    else:
        print("Please set up TensorBoard")

    # model
    criterion = nn.NLLLoss()

    print("Training {0} {1} models for cross validation...".format(
        n_splits, model_name))
    # 1
    # folds, fold = KFold(n_splits=n_splits, shuffle=False, random_state=seed), 0
    # 2
    # folds = GroupKFold(n_splits=n_splits)
    # iter = folds.split(np.zeros(len(dataset)), groups=dataset.data.site_id)
    # 4
    # folds = StratifiedKFold(n_splits=n_splits, random_state=fold_seed, shuffle=True if fold_seed else False)
    # iter = folds.split(np.zeros(len(dataset)), dataset.data.y.numpy(), groups=dataset.data.subject_id)
    # 5
    fold = 0
    iter = multi_site_cv_split(dataset.data.y,
                               dataset.data.site_id,
                               dataset.data.subject_id,
                               n_splits,
                               random_state=fold_seed,
                               shuffle=True if fold_seed else False)

    for train_idx, val_idx in tqdm_notebook(iter, desc='CV', leave=False):
        fold += 1
        liveloss = PlotLosses() if live_loss else None

        # for a specific fold
        if fold_no is not None:
            if fold != fold_no:
                continue

        writer = SummaryWriter(log_dir=osp.join('runs', log_dir_base +
                                                str(fold)))
        model_save_dir = osp.join('saved_models', log_dir_base + str(fold))

        print("creating dataloader tor fold {}".format(fold))

        train_dataset, val_dataset = norm_train_val(dataset, train_idx,
                                                    val_idx)

        model = model_cls(writer)

        train_dataloader = DataLoader(train_dataset,
                                      shuffle=True,
                                      batch_size=batch_size,
                                      collate_fn=lambda data_list: data_list,
                                      num_workers=num_workers,
                                      pin_memory=pin_memory)
        val_dataloader = DataLoader(val_dataset,
                                    shuffle=False,
                                    batch_size=batch_size,
                                    collate_fn=lambda data_list: data_list,
                                    num_workers=num_workers,
                                    pin_memory=pin_memory)

        if fold == 1 or fold_no is not None:
            print(model)
            writer.add_text('model_summary', model.__repr__())
            writer.add_text('training_args', str(saved_args))

        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=lr,
                                      betas=(0.9, 0.999),
                                      eps=1e-08,
                                      weight_decay=weight_decay,
                                      amsgrad=False)
        # scheduler_reduce = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
        scheduler = GradualWarmupScheduler(optimizer,
                                           multiplier=10,
                                           total_epoch=5)
        # scheduler = scheduler_reduce
        # optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
        if dp and use_gpu:
            model = model.cuda() if device_ids is None else model.to(
                device_ids[0])
            model = DataParallel(model, device_ids=device_ids)
        elif use_gpu:
            model = model.to(device)

        if saved_model_path is not None:
            model.load_state_dict(torch.load(saved_model_path))

        best_map, patience_counter, best_score = 0.0, 0, np.inf
        for epoch in tqdm_notebook(range(1, num_epochs + 1),
                                   desc='Epoch',
                                   leave=False):
            logs = {}

            # scheduler.step(epoch=epoch, metrics=best_score)

            for phase in ['train', 'validation']:

                if phase == 'train':
                    model.train()
                    dataloader = train_dataloader
                else:
                    model.eval()
                    dataloader = val_dataloader

                # Logging
                running_total_loss = 0.0
                running_corrects = 0
                running_reg_loss = 0.0
                running_nll_loss = 0.0
                epoch_yhat_0, epoch_yhat_1 = torch.tensor([]), torch.tensor([])
                epoch_label, epoch_predicted = torch.tensor([]), torch.tensor(
                    [])

                logging_hist = True if phase == 'train' else False  # once per epoch
                for data_list in tqdm_notebook(dataloader,
                                               desc=phase,
                                               leave=False):

                    # TODO: check devices
                    if dp:
                        data_list = to_cuda(data_list,
                                            (device_ids[0] if device_ids
                                             is not None else 'cuda'))

                    y_hat, domain_yhat, reg = model(data_list)

                    y = torch.tensor([],
                                     dtype=dataset.data.y.dtype,
                                     device=device)
                    domain_y = torch.tensor([],
                                            dtype=dataset.data.site_id.dtype,
                                            device=device)
                    for data in data_list:
                        y = torch.cat([y, data.y.view(-1).to(device)])
                        domain_y = torch.cat(
                            [domain_y,
                             data.site_id.view(-1).to(device)])

                    loss = criterion(y_hat, y)
                    domain_loss = criterion(domain_yhat, domain_y)
                    # domain_loss = -1e-7 * domain_loss
                    # print(domain_loss.item())
                    if domain_cls:
                        total_loss = domain_loss
                        _, predicted = torch.max(domain_yhat, 1)
                        label = domain_y
                    if final_cls:
                        total_loss = loss
                        _, predicted = torch.max(y_hat, 1)
                        label = y
                    if domain_cls and final_cls:
                        total_loss = (loss + domain_loss).sum()
                        _, predicted = torch.max(y_hat, 1)
                        label = y

                    if is_reg:
                        total_loss += reg.sum()

                    if phase == 'train':
                        # print(torch.autograd.grad(y_hat.sum(), model.saved_x, retain_graph=True))
                        optimizer.zero_grad()
                        total_loss.backward()
                        nn.utils.clip_grad_norm_(model.parameters(), 2.0)
                        optimizer.step()

                    running_nll_loss += loss.item()
                    running_total_loss += total_loss.item()
                    running_reg_loss += reg.sum().item()
                    running_corrects += (predicted == label).sum().item()

                    epoch_yhat_0 = torch.cat(
                        [epoch_yhat_0, y_hat[:, 0].detach().view(-1).cpu()])
                    epoch_yhat_1 = torch.cat(
                        [epoch_yhat_1, y_hat[:, 1].detach().view(-1).cpu()])
                    epoch_label = torch.cat(
                        [epoch_label,
                         label.detach().float().view(-1).cpu()])
                    epoch_predicted = torch.cat([
                        epoch_predicted,
                        predicted.detach().float().view(-1).cpu()
                    ])

                # precision = sklearn.metrics.precision_score(epoch_label, epoch_predicted, average='micro')
                # recall = sklearn.metrics.recall_score(epoch_label, epoch_predicted, average='micro')
                # f1_score = sklearn.metrics.f1_score(epoch_label, epoch_predicted, average='micro')
                accuracy = sklearn.metrics.accuracy_score(
                    epoch_label, epoch_predicted)
                epoch_total_loss = running_total_loss / dataloader.__len__()
                epoch_nll_loss = running_nll_loss / dataloader.__len__()
                epoch_reg_loss = running_reg_loss / dataloader.__len__()

                # print('epoch {} {}_nll_loss: {}'.format(epoch, phase, epoch_nll_loss))
                writer.add_scalars(
                    'nll_loss', {'{}_nll_loss'.format(phase): epoch_nll_loss},
                    epoch)
                writer.add_scalars('accuracy',
                                   {'{}_accuracy'.format(phase): accuracy},
                                   epoch)
                # writer.add_scalars('{}_APRF'.format(phase),
                #                    {
                #                        'accuracy': accuracy,
                #                        'precision': precision,
                #                        'recall': recall,
                #                        'f1_score': f1_score
                #                    },
                #                    epoch)
                if epoch_reg_loss != 0:
                    writer.add_scalars(
                        'reg_loss'.format(phase),
                        {'{}_reg_loss'.format(phase): epoch_reg_loss}, epoch)
                # print(epoch_reg_loss)
                # writer.add_histogram('hist/{}_yhat_0'.format(phase),
                #                      epoch_yhat_0,
                #                      epoch)
                # writer.add_histogram('hist/{}_yhat_1'.format(phase),
                #                      epoch_yhat_1,
                #                      epoch)

                # Save Model & Early Stopping
                if phase == 'validation':
                    model_save_path = model_save_dir + '-{}-{}-{:.3f}-{:.3f}'.format(
                        model_name, epoch, accuracy, epoch_nll_loss)
                    # best score
                    if accuracy > best_map:
                        best_map = accuracy
                        model_save_path = model_save_path + '-best'

                    score = epoch_nll_loss
                    if score < best_score:
                        patience_counter = 0
                        best_score = score
                    else:
                        patience_counter += 1

                    # skip first 10 epoch
                    # best_score = best_score if epoch > 10 else -np.inf

                    if save_model:
                        for th, pfix in zip(
                            [0.8, 0.75, 0.7, 0.5, 0.0],
                            ['-perfect', '-great', '-good', '-bad', '-miss']):
                            if accuracy >= th:
                                model_save_path += pfix
                                break

                        torch.save(model.state_dict(), model_save_path)

                    writer.add_scalars('best_val_accuracy',
                                       {'{}_accuracy'.format(phase): best_map},
                                       epoch)
                    writer.add_scalars(
                        'best_nll_loss',
                        {'{}_nll_loss'.format(phase): best_score}, epoch)

                    writer.add_scalars('learning_rate', {
                        'learning_rate':
                        scheduler.optimizer.param_groups[0]['lr']
                    }, epoch)

                    if patience_counter >= patience:
                        print("Stopped at epoch {}".format(epoch))
                        return

                if live_loss:
                    prefix = ''
                    if phase == 'validation':
                        prefix = 'val_'

                    logs[prefix + 'log loss'] = epoch_nll_loss
                    logs[prefix + 'accuracy'] = accuracy
            if live_loss:
                liveloss.update(logs)
                liveloss.draw()

    print("Done !")
コード例 #4
0
def trainer(classifier,
            optimizer,
            scheduler,
            epochs,
            early_stop,
            train_dataloader,
            validation_dataloader,
            save_file,
            seed_val=0,
            accumulation_steps=1):
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        classifier = nn.DataParallel(classifier)
    classifier.to(device)

    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    embedder = AlbertModel.from_pretrained('albert-base-v2')
    embedder.to(device)

    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    best = (np.inf, -1, -np.inf, None, None)

    liveloss = PlotLosses()
    LossHistory = []
    val_step = 0
    for epoch_i in range(0, epochs):
        logs = {}
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print("Global Learning Rate", optimizer.param_groups[0]["lr"])
        print('Training...')
        classifier.train()
        epoch_loss = 0.
        start = time.time()
        classifier.zero_grad()
        for step, batch in enumerate(train_dataloader):
            b_inputs_c = batch[0].to(device)
            b_inputs_r = batch[1].to(device)
            b_mask_c = batch[2].to(device)
            b_mask_r = batch[3].to(device)
            b_labels = batch[4].to(device)

            x_c = embedder(input_ids=b_inputs_c, attention_mask=b_mask_c)[0]
            x_r = embedder(input_ids=b_inputs_r, attention_mask=b_mask_r)[0]
            loss, logits = classifier(x_c.permute(1, 0, 2),
                                      x_r.permute(1, 0, 2), b_labels)

            if torch.cuda.device_count() > 1:
                loss = loss.sum()

            loss.backward()
            if (step + 1) % accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                classifier.zero_grad()

            batch_loss = loss.cpu().item()
            epoch_loss += loss.cpu().item()

            if (step % 100) == 0:
                print("Step %i with loss %.3f elapsed time %.3f" %
                      (step, epoch_loss / (step + 1), time.time() - start))
                # writer.add_scalar("Loss/train", epoch_loss/(step+1), global_step)
                # writer.flush()

        print('Evaluating...')
        classifier.eval()
        dev_loss = 0.
        total_eval_accuracy = 0.
        y_preds = None
        y_true = None
        for batch in validation_dataloader:
            b_inputs_c = batch[0].to(device)
            b_inputs_r = batch[1].to(device)
            b_mask_c = batch[2].to(device)
            b_mask_r = batch[3].to(device)
            b_labels = batch[4].to(device)

            with torch.no_grad():
                x_c = embedder(input_ids=b_inputs_c,
                               attention_mask=b_mask_c)[0]
                x_r = embedder(input_ids=b_inputs_r,
                               attention_mask=b_mask_r)[0]
                loss, logits = classifier(x_c.permute(1, 0, 2),
                                          x_r.permute(1, 0, 2), b_labels)
                if torch.cuda.device_count() > 1:
                    loss = loss.sum()

            dev_loss += loss.cpu().item()
            label_ids = b_labels.cpu().numpy()
            logits = logits.detach().cpu().numpy()
            total_eval_accuracy += flat_accuracy(logits, label_ids)
            if y_preds is None:
                y_preds = np.argmax(logits, axis=1)
                y_true = label_ids
            else:
                y_preds = np.concatenate((y_preds, np.argmax(logits, axis=1)))
                y_true = np.concatenate((y_true, label_ids))

        avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
        f1_score_1 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary")
        f1_score_0 = precision_recall_fscore_support(y_true,
                                                     y_preds,
                                                     average="binary",
                                                     pos_label=0)

        print("Epoch %i with dev loss %f and dev accuracy %f" %
              (epoch_i + 1, dev_loss, avg_val_accuracy))

        logs["val_loss"] = dev_loss / len(validation_dataloader)
        logs["loss"] = epoch_loss / len(train_dataloader)
        logs["val_accuracy"] = avg_val_accuracy
        liveloss.update(logs)
        LossHistory.append(logs["loss"])
        liveloss.send()

        if (val_step - best[1] >= early_stop and best[0] < dev_loss):
            print("early_stopping, epoch:", epoch_i + 1)
            print(
                "Final dev loss %f Final Train Loss %f Final dev accuracy %f" %
                (dev_loss, epoch_loss, avg_val_accuracy))
            print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2]))
            print("F1_score Sarcasm ", f1_score_1)
            print("F1_score Non-Sarcasm ", f1_score_0)

            return classifier
        elif (best[0] > dev_loss):
            best = (dev_loss, val_step, avg_val_accuracy, f1_score_1,
                    f1_score_0)
            torch.save(classifier.state_dict(), save_file)

        val_step += 1
        classifier.train()

    print("Final dev loss %f Final Train Loss %f Final dev accuracy %f" %
          (dev_loss, epoch_loss, avg_val_accuracy))
    print("Best dev loss %f Best dev accuracy %f" % (best[0], best[2]))
    print("F1_score Sarcasm ", f1_score_1)
    print("F1_score Non-Sarcasm ", f1_score_0)

    return classifier
コード例 #5
0
class LiveLossPlot(Callback):
    """
    Callback to write metrics to `LiveLossPlot <https://github.com/stared/livelossplot>`_, a library for visualisation in notebooks

    Example: ::

        >>> import torch.nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks import LiveLossPlot

        # Example Trial which clips all model gradients norms at 2 under the L1 norm.
        >>> model = torch.nn.Linear(1,1)
        >>> live_loss_plot = LiveLossPlot()
        >>> trial = Trial(model, callbacks=[live_loss_plot], metrics=['acc'])

    Args:
        on_batch (bool): If True, batch metrics will be logged. Else batch metrics will not be logged
        batch_step_size (int): The number of batches between logging metrics
        on_epoch (bool): If True, epoch metrics will be logged every epoch. Else epoch metrics will not be logged
        draw_once (bool): If True, draw the plot only at the end of training. Else draw every time metrics are logged
        kwargs: Keyword arguments for livelossplot.PlotLosses

    State Requirements:
        - :attr:`torchbearer.state.METRICS`: Metrics should be a dict containing the metrics to be plotted
        - :attr:`torchbearer.state.BATCH`: Batch should be the current batch or iteration number in the epoch
    """
    def __init__(self,
                 on_batch=False,
                 batch_step_size=10,
                 on_epoch=True,
                 draw_once=False,
                 **kwargs):
        super(LiveLossPlot, self).__init__()
        self._kwargs = kwargs

        self.on_batch = on_batch
        self.on_epoch = on_epoch
        self.draw_once = draw_once
        self.batch_step_size = batch_step_size

        if on_batch:
            self.on_step_training = self._on_step_training

        if on_epoch:
            self.on_end_epoch = self._on_end_epoch

    def on_start(self, state):
        from livelossplot import PlotLosses
        self.plt = PlotLosses(**self._kwargs)
        self.batch_plt = PlotLosses(**self._kwargs)

    def _on_step_training(self, state):
        self.batch_plt.update({
            k: get_metric('LiveLossPlot', state, k)
            for k in state[torchbearer.METRICS]
        })
        if state[torchbearer.
                 BATCH] % self.batch_step_size == 0 and not self.draw_once:
            with no_print():
                self.batch_plt.draw()

    def _on_end_epoch(self, state):
        self.plt.update({
            k: get_metric('LiveLossPlot', state, k)
            for k in state[torchbearer.METRICS]
        })
        if not self.draw_once:
            with no_print():
                self.plt.draw()

    def on_end(self, state):
        if self.draw_once:
            with no_print():
                self.batch_plt.draw()
                self.plt.draw()
コード例 #6
0
# plot
liveloss = PlotLosses()

# train loop
for ep in range(epoch):
    s_time = time.time()
    p_loss_v = 0
    print(f'start ep: {ep}')

    for it, (batch_x, batch_y) in enumerate(train_loader):
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        predict = model(batch_x)
        p_loss = loss(predict, batch_y)
        p_loss_v = p_loss.item()
        p_loss.backward()
        optimizer.step()

        # plot
        if it % 50 == 0:
            liveloss.update({'loss': p_loss_v})
            liveloss.send()

    print(f'end ep: {ep} @ {time.time()-s_time:.3f}s')

    if (ep + 1) % 2 == 0:
        torch.save(model.state_dict(), f'save/ep_{ep+1}.pth')
コード例 #7
0
    def fit(self, interactions_df, users_df, items_df):
        """
        Training of the recommender.

        :param pd.DataFrame interactions_df: DataFrame with recorded interactions between users and items
            defined by user_id, item_id and features of the interaction.
        :param pd.DataFrame users_df: DataFrame with users and their features defined by
            user_id and the user feature columns.
        :param pd.DataFrame items_df: DataFrame with items and their features defined
            by item_id and the item feature columns.
        """

        del users_df, items_df

        # Shift item ids and user ids so that they are consecutive

        unique_item_ids = interactions_df['item_id'].unique()
        self.item_id_mapping = dict(
            zip(unique_item_ids, list(range(len(unique_item_ids)))))
        self.item_id_reverse_mapping = dict(
            zip(list(range(len(unique_item_ids))), unique_item_ids))
        unique_user_ids = interactions_df['user_id'].unique()
        self.user_id_mapping = dict(
            zip(unique_user_ids, list(range(len(unique_user_ids)))))
        self.user_id_reverse_mapping = dict(
            zip(list(range(len(unique_user_ids))), unique_user_ids))

        interactions_df = interactions_df.copy()
        interactions_df.replace(
            {
                'item_id': self.item_id_mapping,
                'user_id': self.user_id_mapping
            },
            inplace=True)

        # Get the number of items and users

        self.interactions_df = interactions_df
        n_users = np.max(interactions_df['user_id']) + 1
        n_items = np.max(interactions_df['item_id']) + 1

        # Get the user-item interaction matrix (mapping to int is necessary because of how iterrows works)
        r = np.zeros(shape=(n_users, n_items))
        for idx, interaction in interactions_df.iterrows():
            r[int(interaction['user_id'])][int(interaction['item_id'])] = 1

        self.r = r

        # Generate negative interactions
        negative_interactions = []

        i = 0
        while i < self.n_neg_per_pos * len(interactions_df):
            sample_size = 1000
            user_ids = self.rng.choice(np.arange(n_users), size=sample_size)
            item_ids = self.rng.choice(np.arange(n_items), size=sample_size)

            j = 0
            while j < sample_size and i < self.n_neg_per_pos * len(
                    interactions_df):
                if r[user_ids[j]][item_ids[j]] == 0:
                    negative_interactions.append([user_ids[j], item_ids[j], 0])
                    i += 1
                j += 1

        interactions_df = pd.concat([
            interactions_df,
            pd.DataFrame(negative_interactions,
                         columns=['user_id', 'item_id', 'interacted'])
        ])

        # Initialize user and item embeddings as random vectors (from Gaussian distribution)

        self.user_repr = self.rng.normal(0,
                                         1,
                                         size=(r.shape[0], self.embedding_dim))
        self.item_repr = self.rng.normal(0,
                                         1,
                                         size=(r.shape[1], self.embedding_dim))

        # Initialize losses and loss visualization

        if self.print_type is not None and self.print_type == 'live':
            liveloss = PlotLosses()

        training_losses = deque(maxlen=50)
        training_avg_losses = []
        training_epoch_losses = []
        validation_losses = deque(maxlen=50)
        validation_avg_losses = []
        validation_epoch_losses = []
        last_training_total_loss = 0.0
        last_validation_total_loss = 0.0

        # Split the data

        interaction_ids = self.rng.permutation(len(interactions_df))
        train_validation_slice_idx = int(
            len(interactions_df) * (1 - self.validation_set_size))
        training_ids = interaction_ids[:train_validation_slice_idx]
        validation_ids = interaction_ids[train_validation_slice_idx:]

        # Train the model

        for epoch in range(self.n_epochs):
            if self.print_type is not None and self.print_type == 'live':
                logs = {}

            # Train

            training_losses.clear()
            training_total_loss = 0.0
            batch_idx = 0
            for idx in training_ids:
                user_id = int(interactions_df.iloc[idx]['user_id'])
                item_id = int(interactions_df.iloc[idx]['item_id'])

                e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id],
                                                    self.item_repr[item_id])
                self.user_repr[user_id] = self.user_repr[user_id] \
                    + self.lr * (e_ui * self.item_repr[item_id] - self.reg_l * self.user_repr[user_id])
                self.item_repr[item_id] = self.item_repr[item_id] \
                    + self.lr * (e_ui * self.user_repr[user_id] - self.reg_l * self.item_repr[item_id])

                loss = e_ui**2
                training_total_loss += loss

                if self.print_type is not None and self.print_type == 'text':
                    print(
                        "\rEpoch: {}\tBatch: {}\tLast epoch - avg training loss: {:.2f} avg validation loss: {:.2f} loss: {}"
                        .format(epoch, batch_idx, last_training_total_loss,
                                last_validation_total_loss, loss),
                        end="")

                batch_idx += 1

                training_losses.append(loss)
                training_avg_losses.append(np.mean(training_losses))

            # Validate

            validation_losses.clear()
            validation_total_loss = 0.0
            for idx in validation_ids:
                user_id = int(interactions_df.iloc[idx]['user_id'])
                item_id = int(interactions_df.iloc[idx]['item_id'])

                e_ui = r[user_id, item_id] - np.dot(self.user_repr[user_id],
                                                    self.item_repr[item_id])

                loss = e_ui**2
                validation_total_loss += loss

                validation_losses.append(loss)
                validation_avg_losses.append(np.mean(validation_losses))

            # Save and print epoch losses

            training_last_avg_loss = training_total_loss / len(training_ids)
            training_epoch_losses.append(training_last_avg_loss)
            validation_last_avg_loss = validation_total_loss / len(
                validation_ids)
            validation_epoch_losses.append(validation_last_avg_loss)

            if self.print_type is not None and self.print_type == 'live' and epoch >= 3:
                # A bound on epoch prevents showing extremely high losses in the first epochs
                # noinspection PyUnboundLocalVariable
                logs['loss'] = training_last_avg_loss
                logs['val_loss'] = validation_last_avg_loss
                # noinspection PyUnboundLocalVariable
                liveloss.update(logs)
                liveloss.send()

        # Find the most popular items for the cold start problem

        offers_count = interactions_df.loc[:, ['item_id', 'user_id']].groupby(
            by='item_id').count()
        offers_count = offers_count.sort_values('user_id', ascending=False)
        self.most_popular_items = offers_count.index
コード例 #8
0
    def train(self, train_ds, valid_ds, plot_loss=True):
        # Initialize plotting
        if plot_loss:
            liveloss = PlotLosses()

        # Initialize DataLoaders
        tdl = DataLoader(train_ds, batch_size=self.batch_size, pin_memory=True)
        vdl = DataLoader(valid_ds,
                         batch_size=self.batch_size,
                         shuffle=False,
                         pin_memory=True)

        # Lists for losses
        train_losses, valid_losses = [], []
        # Lists for accuracies
        train_accs, valid_accs = [], []

        # Iterate over epochs
        for epoch in range(self.max_epochs):
            # Logs for livelossplot
            logs = {}

            batch_losses = []
            batch_count_goods = []
            # Iterate over batches
            for idx_batch, batch in enumerate(tdl):
                x = batch[0].to(DEVICE)
                y = batch[1].to(device=DEVICE, dtype=torch.long)
                pred = self.model(x)
                loss = self.loss_fn(pred, y)
                batch_losses.append(loss.item())
                # Accuracy
                with torch.no_grad():
                    batch_count_goods.append(self.count_goods(pred, y))
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

            # Save train loss and accuracy for the epoch
            train_losses.append(sum(batch_losses) / len(train_ds))
            train_accs.append(sum(batch_count_goods) / len(train_ds))

            # Compute and save validation loss and accuracy for the epoch
            with torch.no_grad():
                v_batch_losses, v_batch_count_goods = [], []
                for idx_batch, batch in enumerate(vdl):
                    x = batch[0].to(DEVICE)
                    y = batch[1].to(device=DEVICE, dtype=torch.long)
                    pred = self.model(x)
                    loss = self.loss_fn(pred, y)
                    v_batch_losses.append(loss.item())
                    v_batch_count_goods.append(self.count_goods(pred, y))
                valid_losses.append(sum(v_batch_losses) / len(valid_ds))
                valid_accs.append(sum(v_batch_count_goods) / len(valid_ds))

            if plot_loss:
                logs['log loss'] = train_losses[epoch]
                logs['val_log loss'] = valid_losses[epoch]
                logs['accuracy'] = train_accs[epoch]
                logs['val_accuracy'] = valid_accs[epoch]
                liveloss.update(logs)
                liveloss.draw()
コード例 #9
0
    def train(self):
        """ Train the model
        """

        # initial setup
        epoch = 1
        loss_val_best = 100
        num_epochs_increased = 0
        epoch_best = 1
        liveloss = PlotLosses()
        logs = {}

        # Perform training
        while True:

            # Run one iteration of SGD
            t0 = time.time()
            loss_train = self.train_epoch()
            loss_train_eval = self.compute_loss(self.loader_train_eval)
            loss_val = self.compute_loss(self.loader_val)
            time_epoch = time.time() - t0
            self.logger.add_entry({
                'loss_train': loss_train,
                'loss_train_eval': loss_train_eval,
                'loss_val': loss_val
            })

            # run learing rate scheduler
            if self.scheduler:
                self.scheduler.step(loss_val)

            # save logger info
            if self.save_dir:
                self.logger.append(os.path.join(self.save_dir, 'log.txt'))

            # change in loss_val
            d_loss_val = (loss_val - loss_val_best) / loss_val_best * 100

            # display results
            logs['loss'] = loss_train_eval
            logs['val_loss'] = loss_val
            logs['percent improvement'] = (
                loss_val - loss_train_eval) / loss_train_eval * 100
            liveloss.update(logs)
            logs['val_percent improvement'] = d_loss_val
            liveloss.send()
            print(
                'E: {:} / Train: {:.3e} / Valid: {:.3e} / Diff Valid: {:.2f}% / Diff Valid-Train: {:.1f}% / Time: {:.2f}'
                .format(epoch, loss_train_eval, loss_val, d_loss_val,
                        (loss_val - loss_train_eval) / loss_train_eval * 100,
                        time_epoch))

            # if validation loss improves
            if d_loss_val < 0:
                num_epochs_increased = 0

                # record epoch and loss
                epoch_best = epoch
                loss_val_best = loss_val

                # save model weights
                if self.save_dir:
                    print('Validation loss improved. Saving model.')
                    torch.save(self.model.state_dict(),
                               os.path.join(self.save_dir, 'model.dat'))

            else:
                num_epochs_increased = num_epochs_increased + 1

            # stop training if we lose patience:
            if num_epochs_increased > self.patience:
                break

            # advance epoch counter
            epoch = epoch + 1
コード例 #10
0
class train_wrapper():
    """
    Class that keeps a model, its optimiser and dataloaders together.
    Stores the train, validate and evaluate functions for training as well
    as some other useful methods to carry out the training with a love plot
    and save the model.
    """
    
    def __init__(self, model, optimizer, train_loader, validate_loader,
        criterion=nn.CrossEntropyLoss(), device="cpu", keep_best=0):
        "Stores the parameters on the class instance for later methods"
        
        for arg in ["model", "optimizer", "train_loader", "validate_loader",
        "criterion", "device", "keep_best"]:
            exec("self." + arg + "=" + arg)
            
        try:
            self.transform = validate_loader.dataset.transform
        except:
            print("No transform found, test data must be normalised manually")
        
        # store the liveloss as it holds all our logs, useful for later
        self.liveloss = PlotLosses()
        # store the best model params
        self.best_params_dict = {}
        # store the current epoch between training batches
        self.epoch = 0
        # for keeping the best model params
        self.max_acc=0.
            
        return
    
    
    def train(self):
        "Train a single epoch"
        
        # set the model expect a backward pass
        self.model.train()
        
        train_loss, train_accuracy = 0, 0
        
        # for every training batch
        for X, y in self.train_loader:
            
            # put the samples on the device
            X, y = X.to(self.device), y.to(self.device)
            
            # zero the gradent
            self.optimizer.zero_grad()
            
            # find the model output with current parameters
            output = self.model(X)
            
            # caclulate the loss for to the expect output
            loss = self.criterion(output, y)
            
            # propagate the gradients though the network
            loss.backward()
            
            # store the loss (scaled by batch size for averaging)
            train_loss += loss * X.size(0)
            
            # find the predictions from this output
            y_pred = F.log_softmax(output, dim=1).max(1)[1]
            
            # compare to expected output to find the accuracy
            train_accuracy += accuracy_score(y.cpu().numpy(), y_pred.detach().cpu().numpy())*X.size(0)
            
            # improve the parameters
            self.optimizer.step()

        # return the mean loss and accuracy of this epoch
        N_samp = len(self.train_loader.dataset)
        return train_loss/N_samp, train_accuracy/N_samp
    
    
    def validate(self):
        """
        Find the loss and accuracy of the current model parameters to the
        validation data set
        """
        
        # if no validation set present return zeros
        if self.validate_loader == None:
            return torch.tensor(0.), torch.tensor(0.)
        
        # set the model to not expect a backward pass
        self.model.eval()
        
        validation_loss, validation_accuracy = 0., 0.
        
        # for every validate batch
        for X, y in self.validate_loader:
            
            # tell the optimizer not to store gradients
            with torch.no_grad():
                
                # put the samples on the device
                X, y = X.to(self.device), y.to(self.device)
                
                # find the model output with current parameters
                output = self.model(X)
                
                # caclulate the loss for to the expect output
                loss = self.criterion(output, y)
                
                # store the loss (scaled by batch size for averaging)
                validation_loss += loss * X.size(0)
                
                # find the predictions from this output
                y_pred = F.log_softmax(output, dim=1).max(1)[1]
                
                # compare to expected output to find the accuracy
                validation_accuracy += accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())*X.size(0)
        
        # return the mean loss and accuracy of this epoch
        N_samp = len(self.validate_loader.dataset)
        return validation_loss/N_samp, validation_accuracy/N_samp
    
    
    def evaluate(self, test_data, prob_output=True):
        """
        Find the prediction of the current model parameters with the test
        data set and return both the predicted and actual labels
        """
        

        # set the model to not expect a backward pass
        self.model.eval()
        
        y_preds = []
        
        # for every test batch
        for X in test_data:
            
            # normalise the test data with validates transformation
            if self.transform:
                X = self.transform(X)

        
            # tell the optimizer not to store gradients
            with torch.no_grad():
                
                # put the samples on the device
                X = X.to(self.device)
                
                # find the model output with current parameters
                output = self.model(X.view(-1, 1, 28, 28))
                
                # find the predictions from this output
                y_pred = F.log_softmax(output, dim=1)
                if not prob_output:
                    y_pred = y_pred.max(1)[1]
                
                # store the predicted and actual outcomes
                y_preds.append(y_pred.cpu().numpy())

        # return the list of predictions and actual targets
        return np.concatenate(y_preds, 0)
    
    
    def train_model(self, epochs):
        """
        Do a live plot of the training accuracy and loss as the model is trained
        """
        
        for _ in range(epochs):
            logs = {}
            train_loss, train_accuracy = self.train()

            logs['' + 'log loss'] = train_loss.item()
            logs['' + 'accuracy'] = train_accuracy.item()

            validation_loss, validation_accuracy = self.validate()
            logs['val_' + 'log loss'] = validation_loss.item()
            logs['val_' + 'accuracy'] = validation_accuracy.item()

            # if we are after the 
            if self.keep_best:
                if train_accuracy.item() > self.max_acc and self.epoch > self.keep_best:
                    self.max_acc = train_accuracy.item()
                    self.best_params_dict = self.model.state_dict()
            
            self.liveloss.update(logs)
            self.liveloss.draw()
            self.epoch += 1
            
        print("Training Finished")
        return
    
    
    def save_model(self, name, path=F"/content/gdrive/My Drive/models/"):
        """
        Pickel either the whole model or its parameter dictionary
        via torch's save methods
        """
        
        dict = {"model":self.model, "transform":self.transform,
                "Liveloss":self.liveloss}
        torch.save(dict, path + name)
        print("saved to " + path + name)
    

    def num_model_params(self):
        n_params = sum([t.cpu().detach().numpy().size 
                        for t in self.model.parameters()])
        print("Number of model Parameters: ", n_params)
        return n_params


    def max_acc_epoch(self):
        max_acc = self.liveloss.metrics_extrema['val_accuracy']['max']
        for log in self.liveloss.logs:
            if log["val_accuracy"] == max_acc:
                return log["_i"]#

            
    def confusion_matrix(self):
        
        y_preds, ys = [], []
        
        # same code as validate
        self.model.eval()
        
        for X, y in self.validate_loader:
            with torch.no_grad():
                X, y = X.to(self.device), y.to(self.device)
                output = self.model(X)
                y_pred = F.log_softmax(output, dim=1)
                y_pred = y_pred.max(1)[1]
                
                y_preds.append(y_pred.cpu().numpy())
                ys.append(y.cpu().numpy())
        
        y_preds = np.array(y_preds).flatten()
        ys = np.array(ys).flatten()
        
        return ConfusionMatrix(actual_vector=ys, predict_vector=y_preds)
コード例 #11
0
def trainer(cfg, train_id=None, num_workers=15, device=None):
    
    device = device or 'cuda:0' ##
    train_id = train_id or cfg['train_id']
    use_pretrained_vgg=cfg["use_pretrained_vgg"]
    batch_size=cfg["batch_size"]
    lr=cfg["lr"]
    num_epochs=cfg["num_epochs"]   
   
    model = ternausnet.models.UNet11(pretrained=use_pretrained_vgg)
    
    if cfg.get('first_freeze_layers', None) is not None:
        for i in range(cfg['first_freeze_layers']):
            for param in model.encoder[i].parameters():
                param.requires_grad = False
    
    if cfg['pretrained_model'] is not None:
        model.load_state_dict(torch.load(cfg['pretrained_model']))
    model = model.to(device)

    loss = nn.BCEWithLogitsLoss()
   
    optimizer = Adam(filter(lambda x: return x.requires_grad, model.parameters()), lr)    

    d_train = WaterDataset(cfg['train_img_list'], train_transform)
    d_val = WaterDataset(cfg['test_img_list'], test_transform)
    
    print(d_val[0][0].shape)
    

    dl_train = DataLoader(d_train, batch_size, shuffle=True, num_workers=num_workers)
    dl_val = DataLoader(d_val, batch_size, shuffle=False, num_workers=num_workers)

        
    metrics = {
        'val_acc': AccuracyMetric(0.5),
        'train_acc': AccuracyMetric(0.5),
        'val_loss': LossMetric(),
        'train_loss': LossMetric(),
        'train_lake_acc': LakeAccuracyMetric(0.5),
        'val_lake_acc': LakeAccuracyMetric(0.5),
        'train_nolake_acc': NoLakeAccuracyMetric(0.5),
        'val_nolake_acc': NoLakeAccuracyMetric(0.5),
        'val_miou': MIOUMetric(0.5),
        'train_miou': MIOUMetric(0.5),
        'val_f1': F1Metric(0.5),
        'train_f1': F1Metric(0.5)
    }
    
    groups = {
        'accuracy': ['train_acc', 'val_acc'], 
        'bce-loss': ['train_loss', 'val_loss'], 
        'lake-acc': ['train_lake_acc', 'val_lake_acc'],
        'nolake_acc': ['train_nolake_acc', 'val_nolake_acc'],
        'miou': ['train_miou', 'val_miou'],
        'f1': ['train_f1', 'val_f1']
    }
    plotlosses = PlotLosses(groups=groups)

    topk_val_losses = {}

    for epoch in range(num_epochs):
        print('train step')
        for name, metric in metrics.items():
            metric.reset()

        model.train()
        for idx, (im, gt) in enumerate(dl_train):
            im = im.to(device)
            gt = gt.to(device)
            optimizer.zero_grad()

            pred = model(im)
            L = loss(pred, gt)
            L.backward()
            assert pred.shape == gt.shape
            metrics['train_acc'].append(pred, gt)
            metrics['train_lake_acc'].append(pred, gt)
            metrics['train_nolake_acc'].append(pred, gt)
            metrics['train_miou'].append(pred, gt)
            metrics['train_f1'].append(pred, gt)
            metrics['train_loss'].append(L)
            optimizer.step()
        
        torch.cuda.empty_cache()
        
        model.eval()
        print('eval step')
        with torch.no_grad():
            for idx, (im, gt) in enumerate(dl_val):
                im = im.to(device)
                gt = gt.to(device)
                pred = model(im)
                L = loss(pred, gt)
                metrics['val_acc'].append(pred, gt)
                metrics['val_lake_acc'].append(pred, gt)
                metrics['val_nolake_acc'].append(pred, gt)
                metrics['val_miou'].append(pred, gt)
                metrics['val_f1'].append(pred, gt)
                metrics['val_loss'].append(L)
        torch.cuda.empty_cache()
        
        results = {key: metrics[key].result() for key in metrics}
        plotlosses.update(results)
        plotlosses.send()

        for name, metric in metrics.items():
            metric.history()

            
        history = {key: metrics[key].hist for key in metrics}
        
        
        save_models(model, topk_val_losses, metrics['val_loss'].result(), epoch, train_id, save_num_models=3)
    torch.save(model.state_dict(), 'model-latest.pth')
    
    with open(f'history-{train_id}.json', "w") as write_file:
            json.dump(history, write_file, indent=4)
コード例 #12
0
class Trainer(object):
    def __init__(
        self,
        model=None,
        data_loader=None,
        train_times=1000,
        lr=1e-3,
        alpha=0.5,
        use_gpu=True,
        opt_method="sgd",
        save_steps=None,
        checkpoint_dir=None,
    ):

        self.work_threads = 8
        self.train_times = train_times

        self.opt_method = opt_method
        self.optimizer = None
        self.lr_decay = 0
        self.weight_decay = 0
        self.alpha = alpha
        self.lr = lr

        self.model = model
        self.data_loader = data_loader
        self.use_gpu = use_gpu
        self.save_steps = save_steps
        self.checkpoint_dir = checkpoint_dir

        self.liveplot = PlotLosses()

    def train_one_step(self, data, stage=1):
        self.optimizer.zero_grad()
        self.model.zero_grad()
        loss = self.model({
            'batch_h': self.to_var(data['batch_h'], self.use_gpu),
            'batch_t': self.to_var(data['batch_t'], self.use_gpu),
            'batch_r': self.to_var(data['batch_r'], self.use_gpu),
            'batch_y': self.to_var(data['batch_y'], self.use_gpu),
            'mode': data['mode'],
            'stage': stage
        })

        loss.backward()
        nn.utils.clip_grad_norm_(self.model.parameters(), 2)
        self.optimizer.step()
        return loss.item()

    def run(self,
            lr=None,
            alpha=None,
            weight_decay=None,
            train_times=None,
            stage=1,
            multiplier=1):
        if lr:
            self.lr = lr
        if alpha:
            self.alpha = alpha
        if weight_decay:
            self.weight_decay = weight_decay
        if train_times:
            self.train_times = train_times
        if self.use_gpu:
            self.model.cuda()

        if self.optimizer is not None:
            pass
        elif self.opt_method == "Adagrad" or self.opt_method == "adagrad":
            self.optimizer = optim.Adagrad(
                self.model.parameters(),
                lr=self.lr,
                lr_decay=self.lr_decay,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "Adadelta" or self.opt_method == "adadelta":
            self.optimizer = optim.Adadelta(
                self.model.parameters(),
                lr=self.lr,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "Adam" or self.opt_method == "adam":
            self.optimizer = optim.Adam(
                self.model.parameters(),
                lr=self.lr,
                weight_decay=self.weight_decay,
            )
        elif self.opt_method == "ranger":
            if not lr:
                self.optimizer = Ranger(self.model.parameters(),
                                        lr=self.lr,
                                        alpha=self.alpha)
            else:
                self.optimizer = Ranger(self.model.parameters(),
                                        lr=lr,
                                        alpha=self.alpha)
        elif self.opt_method == "rangerva":
            self.optimizer = RangerVA(self.model.parameters(), lr=lr)
        else:
            self.optimizer = optim.SGD(
                self.model.parameters(),
                lr=self.alpha,
                weight_decay=self.weight_decay,
            )
        print("Finish initializing...")

        # training_range = tqdm.tqdm(range(self.train_times))
        training_range = tqdm.trange(self.train_times)
        # training_range = range(self.train_times)
        for epoch in training_range:
            res = 0.0
            for data in self.data_loader:
                loss = multiplier * self.train_one_step(data, stage)
                res += loss
            self.liveplot.update({'loss': res})
            self.liveplot.send()
            if self.save_steps and self.checkpoint_dir and (
                    epoch + 1) % self.save_steps == 0:
                print("Epoch %d has finished, saving..." % (epoch))
                self.model.save_checkpoint(
                    os.path.join(self.checkpoint_dir + "-" + str(epoch) +
                                 ".ckpt"))

    def set_model(self, model):
        self.model = model

    def to_var(self, x, use_gpu):
        if use_gpu:
            return Variable(torch.from_numpy(x).cuda())
        else:
            return Variable(torch.from_numpy(x))

    def set_use_gpu(self, use_gpu):
        self.use_gpu = use_gpu

    def set_alpha(self, alpha):
        self.alpha = alpha

    def set_lr_decay(self, lr_decay):
        self.lr_decay = lr_decay

    def set_weight_decay(self, weight_decay):
        self.weight_decay = weight_decay

    def set_opt_method(self, opt_method):
        self.opt_method = opt_method

    def set_train_times(self, train_times):
        self.train_times = train_times

    def set_save_steps(self, save_steps, checkpoint_dir=None):
        self.save_steps = save_steps
        if not self.checkpoint_dir:
            self.set_checkpoint_dir(checkpoint_dir)

    def set_checkpoint_dir(self, checkpoint_dir):
        self.checkpoint_dir = checkpoint_dir
コード例 #13
0
def train_eval_loop(
    model: Module,
    train_dataset: Dataset,
    val_dataset: Dataset,
    lr: float = 1e-4,
    epoch_n: int = 10,
    batch_size: int = 32,
    device=None,
    early_stopping_patience: int = 10,
    l2_reg_alpha: float = 0,
    max_batches_per_epoch_train: int = 10000,
    max_batches_per_epoch_val: int = 1000,
    optimizer_ctor: Optimizer = None,
    lr_scheduler_ctor=None,
    shuffle_train=True,
    dataloader_workers_n: int = 0,
    verbose_batch: bool = False,
    verbose_liveloss=True,
    prev_loss: Dict[str, List[float]] = {}
) -> Tuple[float, Module, Dict[str, List[float]]]:
    """
    Цикл для обучения модели. После каждой эпохи качество модели оценивается по отложенной выборке.
    :param prev_loss: лоссы от предыдущего цикла обучения
    :param verbose_batch:
    :param model: torch.nn.Module - обучаемая модель
    :param train_dataset: torch.utils.data.Dataset - данные для обучения
    :param val_dataset: torch.utils.data.Dataset - данные для оценки качества
    :param criterion: функция потерь для настройки модели
    :param lr: скорость обучения
    :param epoch_n: максимальное количество эпох
    :param batch_size: количество примеров, обрабатываемых моделью за одну итерацию
    :param device: cuda/cpu - устройство, на котором выполнять вычисления
    :param early_stopping_patience: наибольшее количество эпох, в течение которых допускается
        отсутствие улучшения модели, чтобы обучение продолжалось.
    :param l2_reg_alpha: коэффициент L2-регуляризации
    :param max_batches_per_epoch_train: максимальное количество итераций на одну эпоху обучения
    :param max_batches_per_epoch_val: максимальное количество итераций на одну эпоху валидации
    :param optimizer_ctor
    :param optimizer_params
    :param lr_scheduler_ctor
    :param shuffle_train
    :param dataloader_workers_n
    :return: кортеж из двух элементов:
        - среднее значение функции потерь на валидации на лучшей эпохе
        - лучшая модель
    """
    if device is None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)
    model.to(device)

    if optimizer_ctor is None:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=l2_reg_alpha)
    else:
        optimizer = optimizer_ctor(model.parameters())

    if lr_scheduler_ctor is not None:
        lr_scheduler = lr_scheduler_ctor(optimizer)
    else:
        lr_scheduler = None

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=shuffle_train,
                                  num_workers=dataloader_workers_n)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=dataloader_workers_n)

    best_val_loss = float('inf')
    best_epoch_i = 0
    best_model = copy.deepcopy(model)

    losses = {
        'train_loss': prev_loss.get('train_loss', []),
        'valid_loss': prev_loss.get('val_loss', [])
    }
    if verbose_liveloss:
        liveloss = PlotLosses()
    for epoch_i in range(epoch_n):
        try:
            epoch_start = datetime.datetime.now()
            print('Эпоха {}'.format(epoch_i))

            model.train()
            mean_train_loss = 0
            train_batches_n = 0
            for batch_i, (batch_x, batch_y) in enumerate(train_dataloader):
                start_batch = time.time()
                if batch_i > max_batches_per_epoch_train:
                    break

                mask = (batch_x[:, :, 1] != 0)

                batch_x = copy_data_to_device(batch_x, device)
                batch_y = copy_data_to_device(batch_y, device)
                mask = copy_data_to_device(mask, device)
                # set_trace()
                pred = model(batch_x)

                loss = -model.crf(pred.permute(0, 2, 1), batch_y,
                                  mask) / batch_size
                # loss = criterion(pred, batch_y)

                model.zero_grad()
                loss.backward()

                optimizer.step()

                mean_train_loss += float(loss)
                train_batches_n += 1
                if verbose_batch:
                    print(
                        f"Батч {batch_i} выполнен за {time.time() - start_batch:.2f} секунд"
                    )

            mean_train_loss /= train_batches_n
            print('Эпоха: {} итераций, {:0.2f} сек'.format(
                train_batches_n,
                (datetime.datetime.now() - epoch_start).total_seconds()))
            print('Среднее значение функции потерь на обучении',
                  mean_train_loss)
            losses['train_loss'].append(mean_train_loss)

            model.eval()
            mean_val_loss = 0
            val_batches_n = 0

            with torch.no_grad():
                for batch_i, (batch_x, batch_y) in enumerate(val_dataloader):
                    if batch_i > max_batches_per_epoch_val:
                        break

                    mask = (batch_x[:, :, 1] != 0)

                    batch_x = copy_data_to_device(batch_x, device)
                    batch_y = copy_data_to_device(batch_y, device)
                    mask = copy_data_to_device(mask, device)

                    pred = model(batch_x)
                    loss = -model.crf(pred.permute(0, 2, 1), batch_y,
                                      mask) / batch_size

                    mean_val_loss += float(loss)
                    val_batches_n += 1

            mean_val_loss /= val_batches_n
            print('Среднее значение функции потерь на валидации',
                  mean_val_loss)
            losses['valid_loss'].append(mean_val_loss)

            logs = {'log loss': mean_train_loss, 'val_log loss': mean_val_loss}

            if mean_val_loss < best_val_loss:
                best_epoch_i = epoch_i
                best_val_loss = mean_val_loss
                best_model = copy.deepcopy(model)
                print('Новая лучшая модель!')
            elif epoch_i - best_epoch_i > early_stopping_patience:
                print(
                    'Модель не улучшилась за последние {} эпох, прекращаем обучение'
                    .format(early_stopping_patience))
                break

            if lr_scheduler is not None:
                lr_scheduler.step(mean_val_loss)

            print()
        except KeyboardInterrupt:
            print('Досрочно остановлено пользователем')
            break
        except Exception as ex:
            print('Ошибка при обучении: {}\n{}'.format(ex,
                                                       traceback.format_exc()))
            break
        if verbose_liveloss:
            liveloss.update(logs)
            liveloss.send()

    return best_val_loss, best_model, losses
コード例 #14
0
ファイル: utils.py プロジェクト: liuziyuan827/test1
class Logger():
    def __init__(self, n_epochs, batches_epoch, out_dir, start_epoch=1):
        # self.viz = Visdom()
        self.n_epochs = n_epochs
        self.batches_epoch = batches_epoch
        self.epoch = start_epoch
        self.batch = 1
        self.prev_time = time.time()
        self.mean_period = 0
        self.losses = {}
        self.loss_windows = {}
        self.image_windows = {}
        self.out_dir = out_dir
        self.to_image = transforms.ToPILImage()
        self.liveloss = PlotLosses()

    def log(self, losses=None, images=None):
        pass
        self.mean_period += (time.time() - self.prev_time)
        self.prev_time = time.time()

        sys.stdout.write(
            '\rEpoch %03d/%03d [%04d/%04d] -- ' %
            (self.epoch, self.n_epochs, self.batch, self.batches_epoch))

        plots = {}

        for i, loss_name in enumerate(losses.keys()):
            if loss_name not in self.losses:
                self.losses[loss_name] = losses[loss_name].data
            else:
                self.losses[loss_name] += losses[loss_name].data

            if (i + 1) == len(losses.keys()):
                sys.stdout.write(
                    '%s: %.4f -- ' %
                    (loss_name, self.losses[loss_name] / self.batch))
            else:
                sys.stdout.write(
                    '%s: %.4f | ' %
                    (loss_name, self.losses[loss_name] / self.batch))

        batches_done = self.batches_epoch * (self.epoch - 1) + self.batch
        batches_left = self.batches_epoch * (
            self.n_epochs - self.epoch) + self.batches_epoch - self.batch
        sys.stdout.write('ETA: %s' % (datetime.timedelta(
            seconds=batches_left * self.mean_period / batches_done)))

        if self.batch % 10 == 0:
            # Save images
            plt.ioff()
            fig = plt.figure(figsize=(100, 50))
            for i, (image_name, tensor) in enumerate(images.items()):
                ax = plt.subplot(1, len(images), i + 1)
                ax.imshow(self.to_image(tensor.cpu().data[0]))
            fig.savefig(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch))
            plt.close(fig)
            # self.to_image(images["composed"].cpu().data[0]).save(self.out_dir + '/%d_%d.png' % (self.epoch, self.batch))
            # plt.close(fig)

        # End of epoch
        if (self.batch % self.batches_epoch) == 0:
            # Plot losses
            for i, (loss_name, loss) in enumerate(self.losses.items()):
                #         if loss_name not in self.loss_windows:
                #             self.loss_windows[loss_name] = self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]),
                #                                                             opts={'xlabel': 'epochs', 'ylabel': loss_name, 'title': loss_name})
                #         else:
                #             self.viz.line(X=np.array([self.epoch]), Y=np.array([loss/self.batch]), win=self.loss_windows[loss_name], update='append')

                plots[loss_name] = self.losses[loss_name] / self.batch

                # Reset losses for next epoch
                self.losses[loss_name] = 0.0

            self.liveloss.update(plots)
            self.liveloss.send()

            self.epoch += 1
            self.batch = 1
            sys.stdout.write('\n')
        else:
            self.batch += 1
コード例 #15
0
def train(model, criterion, optimizer, train_dl, test_dl, num_epochs=40):
    liveloss = PlotLosses()
    for epoch in range(num_epochs):
        train_loss, valid_loss = [], []
        logs = {}
        prefix = ''
  
        # Training Part
        model.train()
        for i, data in enumerate(train_dl, 0):
            # Get the inputs
            inputs = labels = data
            inputs = inputs.cuda()
            labels = labels.cuda()
            
            inputs = inputs.float()
            labels = labels.float()
            
            # zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            outputs = outputs.cuda()
            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()
            
            ## -> Dense Output Re-feeding <- ##
            
            # Zero the gradiants
            optimizer.zero_grad()

            # Important detach() the output, to avoid construction of 
            # computation graph
            outputs = model(outputs.detach())
            outputs = outputs.cuda()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            logs[prefix + 'MMSE loss'] = loss.item()
        
        for i, data in enumerate(test_dl, 0):
            model.eval()
            inputs = labels = data
            inputs = inputs.cuda()
            labels = labels.cuda()
            
            inputs = inputs.float()
            labels = labels.float()
            
            outputs = model(inputs)
            outputs = outputs.cuda()
            loss = criterion(outputs, labels)
            
            valid_loss.append(loss.item())
            prefix = 'val_'
            logs[prefix + 'MMSE loss'] = loss.item()
        
    print()
    liveloss.update(logs)
    liveloss.draw()
    print ("Epoch:", epoch+1, " Training Loss: ", np.mean(train_loss), " Valid Loss: ", np.mean(valid_loss))
コード例 #16
0
def train_model(model,
                dataloaders,
                dataset_sizes,
                criterion,
                optimizer,
                scheduler,
                num_epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    since = time.time()
    liveloss = PlotLosses()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                print("\rIteration: {}/{}, Loss: {}.".format(
                    i + 1, len(dataloaders[phase]),
                    loss.item() * inputs.size(0)),
                      end="")

                #                 print( (i+1)*100. / len(dataloaders[phase]), "% Complete" )
                sys.stdout.flush()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                avg_loss = epoch_loss
                t_acc = epoch_acc
            else:
                val_loss = epoch_loss
                val_acc = epoch_acc

#             print('{} Loss: {:.4f} Acc: {:.4f}'.format(
#                 phase, epoch_loss, epoch_acc))

# deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        liveloss.update({
            'log loss': avg_loss,
            'val_log loss': val_loss,
            'accuracy': t_acc,
            'val_accuracy': val_acc
        })

        liveloss.draw()
        print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc))
        print('Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc))
        print('Best Val Accuracy: {}'.format(best_acc))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
コード例 #17
0
def vs_net_train(args):
    train_path = args.train_h5
    val_path = args.val_h5
    NEPOCHS = args.epoch
    CASCADE = args.cascade
    LR = args.lr
    NBATCH = args.nb
    Res_name = args.Result_name
    device_num = args.device
    chpoint = args.checkpoint
    aug = args.aug
    zpad = args.zpad

    device = 'cuda:' + str(device_num)
    if zpad is False:
        print("input is from LORAKS")
        trainset = D.MAGIC_Dataset_LORAKS(train_path,
                                          augmentation=aug,
                                          verbosity=False)
        testset = D.MAGIC_Dataset_LORAKS(val_path,
                                         augmentation=False,
                                         verbosity=False)
    elif zpad is True:
        print("input is from Zero-Padding")
        trainset = D.MAGIC_Dataset_zpad(train_path,
                                        augmentation=aug,
                                        verbosity=False)
        testset = D.MAGIC_Dataset_zpad(val_path,
                                       augmentation=False,
                                       verbosity=False)

    trainloader = DataLoader(trainset,
                             batch_size=NBATCH,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=0)
    valloader = DataLoader(testset,
                           batch_size=NBATCH,
                           shuffle=False,
                           pin_memory=True,
                           num_workers=0)

    dataloaders = {'train': trainloader, 'validation': valloader}

    net = network(alfa=None, beta=0.5, cascades=CASCADE)
    net = net.to(device)
    if chpoint is not None:
        print('Loading network from:', chpoint)
        net.load_state_dict(torch.load(chpoint))

    ########## Training ####################
    _im0, _true, _Sens, _X_kJVC, _mask = testset[13]

    _im0, _true, _Sens, _X_kJVC, _mask = _im0.unsqueeze(0).to(device), _true.unsqueeze(0).to(device), _Sens.unsqueeze(0).to(device),\
    _X_kJVC.unsqueeze(0).to(device), _mask.unsqueeze(0).to(device)

    criterion = torch.nn.L1Loss()

    liveloss = PlotLosses()
    optimizer = torch.optim.Adam(net.parameters(), lr=LR)
    #    print('Now Training the Network')
    #    pdb.set_trace()
    for epoch in range(NEPOCHS):
        print('Epoch', epoch + 1)
        logs = {}
        for phase in {'train', 'validation'}:
            if phase == 'train':
                kbar = pkbar.Kbar(target=len(trainloader), width=2)
                net.train()
            else:
                net.eval()

            running_loss = 0.0
            running_mse = 0.0

            iii = 0
            for im0, true, tSens, tX_kJVC, tmask in dataloaders[phase]:

                im0, true, tX_kJVC, tSens, tmask = im0.to(device,non_blocking=True), true.to(device,non_blocking=True), tX_kJVC.to(device,non_blocking=True),\
                                                   tSens.to(device,non_blocking=True), tmask.to(device,non_blocking=True)

                if phase == 'train':
                    out = net(im0, tX_kJVC, tmask, tSens)
                    loss = criterion(out, true)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    running_loss = running_loss + loss.item() * im0.size(0)
                    prefix = ''
                    kbar.update(iii,
                                values=[('L', 100 * running_loss / (iii + 1))])
                    iii = iii + 1
                else:
                    with torch.no_grad():
                        prefix = 'val_'
                        out = net(im0, tX_kJVC, tmask, tSens)
                        loss = criterion(out, true)
                        running_loss = running_loss + loss.item() * im0.size(0)
#                  print('hello')

                epoch_loss = running_loss / len(dataloaders[phase].dataset)

                logs[prefix + 'Loss'] = epoch_loss * 100

        if epoch % 10 == 0:
            save_name = 'Result_' + Res_name + '/Val_Epoch_' + str(
                epoch) + '.jpg'
            show_output(net, _im0, _true, _X_kJVC, _Sens, _mask, save_name)
            file_name = 'models/' + Res_name + '/Weights_Epoch_' + str(epoch)

            print(' SAVING WEIGHTS : ' + file_name)
            torch.save(net.state_dict(), file_name)

            f = open("models/" + Res_name + "/Losses_graph.obj",
                     "wb")  # Saving Lossplot objects to pickle
            pickle.dump(liveloss, f)
            f.close()

        liveloss.update(logs)
        f = open("Loss_Logging.txt", "a")
        kbar.add(1,
                 values=[('Train', logs['Loss']), ('Val', logs['val_Loss'])])
        f.write("Epoch{} : Training Loss : {:.5f} & Validation Loss: {:.5f}\n".
                format(epoch, logs['Loss'], logs['val_Loss']))
        f.close()
コード例 #18
0
liveloss = PlotLosses()

for i in range(100000):
    done = False
    score = 0
    obs = env.reset()
    agent.noise.reset()
    while not done:
        # env.render()
        act = agent.choose_action(obs)
        new_state, reward, done, info = env.step(act)
        agent.remember(obs, act, reward, new_state, int(done))
        agent.learn()
        score += reward
        obs = new_state

    score_history.append(score)

    metrics = {"score": score_history}

    print(
        "episode",
        i,
        "score %.2f" % score,
        "100 game average %.2f" % np.mean(score_history[-100:]),
    )

    liveloss.update(metrics)
    liveloss.send()
env.close()
コード例 #19
0
    for fit, ind in zip(fits, offspring):
        avg_h += fit[0]
        ind.fitness.values = fit
        
    #nova população
    population[:] = offspring
        
    #pega melhor e pior indivíduos para montar o gráfico
    top = tools.selBest(population, k=1)
    worst = tools.selWorst(population, k=1)
    
    avg_h = avg_h/len(population)
    top_h = nqueen_fitness(top[0])[0]
    worst_h = nqueen_fitness(worst[0])[0]
    plotlosses.update({'top': top_h,
                       'average': avg_h,
                       'worst': worst_h})
    plotlosses.send()
    
    #Avalia critério de parada
    if(nqueen_fitness(top[0])[0] == 0): 
        print(top[0])
        resultado = binToDec(top[0],log_N)
        #dataframe
        eixos = [i for i in range(N)]
        estado_inicial  = pd.DataFrame(index=(eixos),columns=(eixos))
        estadoInicial = list(random.randrange(N) for i in range(N))
        for i in range(len(estadoInicial)):
            estado_inicial[eixos[i]][resultado[i]] = 'rainha'
            
コード例 #20
0
ファイル: 2-convnet.py プロジェクト: Fatema/ssa-ml
    # iterate entire test dataset
    for x,t in test_loader:
        x,t = x.to(device), t.to(device)

        p = N(x).view(x.size(0), len(class_names))
        loss = torch.nn.functional.cross_entropy(p, t)
        pred = p.argmax(dim=1, keepdim=True)

        test_loss_arr = np.append(test_loss_arr, loss.data)
        test_acc_arr = np.append(test_acc_arr, pred.data.eq(t.view_as(pred)).float().mean().item())

    # NOTE: live plot library has dumb naming forcing our 'test' to be called 'validation'
    liveplot.update({
        'accuracy': train_acc_arr.mean(),
        'val_accuracy': test_acc_arr.mean(),
        'loss': train_loss_arr.mean(),
        'val_loss': test_loss_arr.mean()
    })
    liveplot.draw()

    epoch = epoch+1

# plot predictions
def plot_image(i, predictions_array, true_label, img):
    predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img, cmap=plt.cm.binary)
コード例 #21
0
# TO START:
# pip install livelossplot
# pip install neptune-cli
# neptune account login
# neptune run minimal-neptune.py
# enjoy results

from time import sleep
import numpy as np

from livelossplot import PlotLosses

liveplot = PlotLosses(target='neptune')
for i in range(20):
    liveplot.update({
        'accuracy': 1 - np.random.rand() / (i + 2.),
        'val_accuracy': 1 - np.random.rand() / (i + 0.5),
        'mse': 1. / (i + 2.),
        'val_mse': 1. / (i + 0.5)
    })
    liveplot.draw()
    sleep(.5)
コード例 #22
0
def fit_model(train_loader,
              val_loader,
              model,
              optimizer,
              scheduler,
              n_epochs,
              log_interval,
              plot=True,
              burnin=-1,
              patience=3,
              early_stop_score='MAP',
              eval_metric='cosine'):
    early_stop = {}
    early_stop['best'] = -float('inf')
    early_stop['best_params'] = to_cpu(model.state_dict())
    early_stop['fails'] = 0

    if plot:
        liveloss = PlotLosses()
    for epoch in range(n_epochs):
        logs = {}
        start_time = time.time()

        # Training
        train_loss = train_epoch(train_loader, model, optimizer)
        train_scores = {}
        # Turned off for optimize
        # if epoch > 0 and epoch % log_interval == 0:
        # train_scores = evaluate_ranking(model, train_loader, metric=eval_metric)

        elapsed = time.time() - start_time
        message = '\n' + '=' * 80
        message += '\nTrain:     '
        message += f' epoch: {epoch:2d}, time: {int(elapsed):d}s., loss: {train_loss:5.3f}'
        if 'silhouette' in train_scores:
            message += f', silouhette: {train_scores["silhouette"]:.2f}'
        message += '\n'

        # Validation
        start_time = time.time()
        val_loss = test_epoch(val_loader, model)
        val_scores = {}

        if epoch > 0 and epoch % log_interval == 0:
            train_label_set = list(set(train_loader.dataset.labels))
            val_scores = evaluate_ranking(model,
                                          val_loader,
                                          train_label_set,
                                          metric=eval_metric)

            # early stopping
            if val_scores[early_stop_score] > early_stop['best']:
                early_stop['best'] = val_scores[early_stop_score]
                early_stop['best_params'] = to_cpu(model.state_dict())
                early_stop['fails'] = 0
                early_stop['val_scores'] = val_scores
            else:
                early_stop['fails'] += 1
            if early_stop['fails'] >= patience:
                raise EarlyStopException(early_stop['best'],
                                         early_stop['best_params'],
                                         early_stop['fails'],
                                         early_stop['val_scores'])

        elapsed = time.time() - start_time

        message += 'Validation:'
        message += f' epoch: {epoch:2d}, time: {int(elapsed):d}s., loss: {val_loss:5.3f}'
        if 'silhouette' in val_scores:
            message += f', silhouette: {val_scores["silhouette"]:.2f}'
            message += f'\n            MAP: {val_scores["MAP"]:.2f}'
            message += f', MAP (seen): {val_scores["MAP seen labels"]:.2f}'
            message += f', MAP (unseen): {val_scores["MAP unseen labels"]:.2f}'
        message += '\n'
        message += '=' * 80 + '\n'
        print(message)

        logs['loss'] = train_loss
        logs['val_loss'] = val_loss
        for score, value in train_scores.items():
            logs[score] = value
        for score, value in val_scores.items():
            logs[f'val_{score}'] = value

        if epoch > burnin:
            scheduler.step(val_loss)

        if plot:
            liveloss.update(logs)
            liveloss.draw()

    # return data in case it never early stopped
    return early_stop
コード例 #23
0
train_loader, val_loader, test_loader = dataloader_make(ICGT_tips_train, ICGT_tips_test)

liveloss = PlotLosses()
for epoch in range(n_epochs):
    logs = {}
    train_loss, train_acc = train(model, optimiser, criterion, train_loader)

    logs['' + 'log loss'] = train_loss.item()
    logs['' + 'accuracy'] = train_acc.item()
    
    val_loss, val_acc = validate(model, criterion, val_loader)
    logs['val_' + 'log loss'] = val_loss.item()
    logs['val_' + 'accuracy'] = val_acc.item()
    
    
    liveloss.update(logs)
    liveloss.draw()

model.eval()
output = model(ICGT_tips_test.data.float())
truth = ICGT_tips_test.labels
avg_error = [0, 0, 0]
max_error = [0, 0, 0]
bad_index = [0, 0, 0]
for i in range(len(truth)):
    for n in [0, 1, 2]:
        error = abs(1 - (truth[i,n] / output[i,n]))
        error = error.item()
        avg_error[n] += error
        if error > max_error[n]:
            max_error[n] = error
コード例 #24
0
def train_model_it(model,
                   dataloaders,
                   dataset_sizes,
                   criterion,
                   optimizer,
                   batch_size,
                   num_epochs=10,
                   scheduler=None):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    since = time.time()
    liveloss = PlotLosses()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        running_loss = 0.0
        running_corrects = 0
        #Iteration
        for i, (inputs, labels) in enumerate(dataloaders['train']):
            if scheduler != None:
                scheduler.step()
            model.train()
            running_loss = 0.0
            running_corrects = 0
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            print("\rTraining Iteration: {}/{}, Loss: {}.".format(
                i + 1, len(dataloaders['train']),
                loss.item() * inputs.size(0) / batch_size),
                  end="")
            sys.stdout.flush()

            if (i + 1) % 100 == 0:
                it_loss = running_loss / batch_size
                it_acc = running_corrects.double() / batch_size
                model.eval()
                val_loss = 0
                val_corr = 0
                for j, (inputs, labels) in enumerate(dataloaders['val']):
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(False):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)
                    val_loss += loss.item() * inputs.size(0)
                    val_corr += torch.sum(preds == labels.data)
                    print("\rValidation Iteration: {}/{}, Loss: {}.".format(
                        j + 1, len(dataloaders['val']),
                        loss.item() * inputs.size(0) / batch_size),
                          end="")
                    sys.stdout.flush()
                valid_loss = val_loss / dataset_sizes['val']
                valid_acc = val_corr.double() / dataset_sizes['val']

                if valid_acc > best_acc:
                    best_acc = valid_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    # statistics

                liveloss.update({
                    'log loss': it_loss,
                    'val_log loss': valid_loss,
                    'accuracy': it_acc,
                    'val_accuracy': valid_acc
                })

                liveloss.draw()
                print('validation loss: {}, validation accuracy: {}'.format(
                    valid_loss, valid_acc))
                print('Best Accuracy: {}'.format(best_acc))

                torch.save(
                    model.state_dict(),
                    "./models/acc_{}_loss_{}.pt".format(best_acc, valid_loss))

#         print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc))
#         print(  'Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc))
#         print('Best Val Accuracy: {}'.format(best_acc))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
コード例 #25
0
def train_model(output_path, model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=5, scheduler=None):
	if not os.path.exists('models/'+str(output_path)):
		os.makedirs('models/'+str(output_path))
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	since = time.time()
	liveloss = PlotLosses()
	best_model_wts = copy.deepcopy(model.state_dict())
	best_acc = 0.0
	best = 0
	for epoch in range(num_epochs):
		print('Epoch {}/{}'.format(epoch+1, num_epochs))
		print('-' * 10)

		# Each epoch has a training and validation phase
		for phase in ['train', 'val']:
			if phase == 'train':
				
				if scheduler != None:
					scheduler.step()
				model.train()  # Set model to training mode
			else:
				pbar = dataloaders[phase]
				model.eval()   # Set model to evaluate mode

			running_loss = 0.0
			running_corrects = 0

			# Iterate over data.
			pbar = tqdm(dataloaders[phase])
			for i,(inputs, labels) in enumerate(pbar):
				inputs = inputs.to(device)
				labels = labels.to(device)

				# zero the parameter gradients
				optimizer.zero_grad()

				# forward
				# track history if only in train
				with torch.set_grad_enabled(phase == 'train'):
					outputs = model(inputs)
					_, preds = torch.max(outputs, 1)
					loss = criterion(outputs, labels)

					# backward + optimize only if in training phase
					if phase == 'train':
						loss.backward()
						optimizer.step()

				# statistics
				running_loss += loss.item() * inputs.size(0)
				running_corrects += torch.sum(preds == labels.data)
				#print("\rIteration: {}/{}, Loss: {}.".format(i+1, len(dataloaders[phase]), loss.item() * inputs.size(0)), end="")

#				 print( (i+1)*100. / len(dataloaders[phase]), "% Complete" )
				pbar.set_description(desc= f'Loss={loss.item()} Batch_id={i} ')
				
				
			epoch_loss = running_loss / dataset_sizes[phase]
			epoch_acc = running_corrects.double() / dataset_sizes[phase]
			if phase == 'train':
				avg_loss = epoch_loss
				t_acc = epoch_acc
			else:
				val_loss = epoch_loss
				val_acc = epoch_acc
			
#			 print('{} Loss: {:.4f} Acc: {:.4f}'.format(
#				 phase, epoch_loss, epoch_acc))

			# deep copy the model
			if phase == 'val' and epoch_acc > best_acc:
				best_acc = epoch_acc
				best = epoch + 1
				best_model_wts = copy.deepcopy(model.state_dict())
				
		liveloss.update({
			'log loss': avg_loss,
			'val_log loss': val_loss,
			'accuracy': t_acc,
			'val_accuracy': val_acc
		})
				
		#liveloss.draw()
		print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc))
		print(  'Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc))
		print()
		torch.save(model.state_dict(), './models/' + str(output_path) + '/model_{}_epoch.pt'.format(epoch+1))
	time_elapsed = time.time() - since
	print('Training complete in {:.0f}m {:.0f}s'.format(
		time_elapsed // 60, time_elapsed % 60))
	print('Best Validation Accuracy: {}, Epoch: {}'.format(best_acc, best))
コード例 #26
0
                         'Deep Learning', f'G_{epoch}.pth'))
        torch.save(
            D.state_dict(),
            os.path.join('/', 'content', 'drive', 'My Drive',
                         'University Work', 'Year 3',
                         'Software, Systems, & Applications III',
                         'Deep Learning', f'D_{epoch}.pth'))

    # plot some examples
    plt.grid(False)
    plt.imshow(torchvision.utils.make_grid(g).cpu().data.permute(
        0, 2, 1).contiguous().permute(2, 1, 0),
               cmap=plt.cm.binary)

    liveplot.update({
        'generator loss': gen_loss_arr.mean(),
        'discriminator loss': dis_loss_arr.mean()
    })
    liveplot.draw()
    sleep(1.)

    epoch = epoch + 1
"""**Sample a batch from the generative model to show the output diversity**"""

G.eval()

horse_seed = random.randint(0, 1000000000)
print(horse_seed)

torch.manual_seed(460150825)  # change to horse_seed for random batches
horses = G.generate(torch.randn(100, 100, 1, 1).to(device))
        best_acc = test_correct / test_total


#        checkpoint = torch.load('./checkpoint/Sqnet_1x_v1.0/Sqnet_1x_v1.0_Cifar10.ckpt')
#        net.load_state_dict(checkpoint['net_state_dict'])
#        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

liveloss = PlotLosses()
for _epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()
    train(_epoch)
    print()
    test(_epoch)
    print()
    print()
    end_time = time.time()
    print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time))
    best_cost = end_time - start_time
    if end_time - start_time < best_cost:
        best_cost = end_time - start_time

    liveloss.update({
        'log loss': train_loss,
        'val_log loss': test_loss,
        'accuracy': train_correct,
        'val_accuracy': test_correct
    })
    liveloss.draw()
print('Best Cost: %ds' % (best_cost))
print('Best Acc: %.4f percent' % (best_acc * 100))
コード例 #28
0
    def train_classifier(
        self,
        train_loader,
        test_loader,
        params: dict = None,
        livelossplot=False,
        save_checkpoint_each=None,
    ):
        """
        Method to train the model.

        Arguments:
        ----------
            - train_loader : DatasetLoader for the training set
            - test_loader : DatasetLoader for the test set
            - params (dict) : if needed to update some parameters such as epochs without rebuilding
            the entire class put the updated parameters here
            - livelossplot (bool=False): use livelossplot to plot running loss and error_rate
            - save_checkpoint_each (list): list of epoch when we want to save model
        """
        # Update parameters if given

        if save_checkpoint_each is None:
            save_checkpoint_each = [self.params_classifier["epochs"]]
        if params:
            for param, value in params.items():
                self.params_classifier[param] = value

        # Define liveloss and time of training start
        if livelossplot:
            liveloss = PlotLosses()
        since = time.time()

        # Show which device is used
        print("Using device {}".format(self.device))
        self.model.to(self.device)

        loader_dict = {"train": train_loader, "validation": test_loader}
        for e in range(self.params_classifier["epochs"]):
            self.logs = {}
            if not livelossplot:
                print("Epoch {}/{} :".format(e,
                                             self.params_classifier["epochs"]))
                print("--------------")
            # Alternate between train and validation phase
            for phase in ["train", "validation"]:
                if phase == "train":
                    self.model.train()
                else:
                    self.model.eval()

                # Define loss and uncorrects predictions
                running_loss = 0.0
                running_uncorrects = 0

                # Loop over loader
                for images, labels in iter(loader_dict[phase]):
                    images = images.to(self.device)
                    labels = torch.tensor(labels,
                                          dtype=torch.long,
                                          device=self.device)

                    # Compute forward
                    output = self.model.forward(images)
                    loss = self.loss(output, labels)

                    # Do the retropropag if in train phase
                    if phase == "train":
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()

                    # Compute prediction
                    _, predicted = torch.max(output, 1)
                    running_loss += loss.detach() * images.size(0)
                    running_uncorrects += torch.sum(
                        predicted != labels.data.detach())

                # Compute loss and error_rate
                size_loader = len(loader_dict[phase].dataset)
                epoch_loss = running_loss / size_loader
                epoch_error_rate = running_uncorrects.float() / size_loader

                # Set the prefix for logs
                prefix = ""
                if phase == "validation":
                    prefix = "val_"

                # Update logs
                self.logs[prefix + "log loss"] = epoch_loss.item()
                self.logs[prefix + "error_rate"] = epoch_error_rate.item()

            # Use liveloss to plot loss and accuracy
            if livelossplot:
                liveloss.update(self.logs)
                liveloss.draw()
            else:
                string_print = """
                Training:               |   Validation:
                    log loss = {}       |       val_log loss = {}
                    error_rate = {}     |       val_error_rate = {}
                """.format(
                    self.logs["log loss"],
                    self.logs["val_log loss"],
                    self.logs["error_rate"],
                    self.logs["val_error_rate"],
                )
                print(string_print)

            # Save checkpoint
            if (e + 1) in save_checkpoint_each:
                save_checkpoint(
                    self.model,
                    model_name="AlexNet_checkpoint_e{}.pth".format(e))

        # Print training time
        time_elapsed = time.time() - since
        print("Training complete in {:.0f}m {:.0f}s".format(
            time_elapsed // 60, time_elapsed % 60))
コード例 #29
0
ファイル: bprH.py プロジェクト: qiaojj/BPRH
    def fit(self,
            X,
            eval_X,
            y=None,
            model_saved_path='bprh_model.pkl',
            iter_to_save=5000,
            coselection_saved_path='data/item-set-coselection.pkl',
            iter_to_log=100,
            correlation=True,
            coselection=False,
            plot_metric=False,
            log_metric=False):
        # Here we do not load model -> train a new model
        if self.existed_model_path is None:
            # To make sure train and test works with inconsistent user and item list,
            # we transform user and item's string ID to int ID so that their ID is their index in U and V
            print("Registering Model Parameters")
            # rename user and item
            self.user_original_id_list = sorted(
                set(X.UserID).union(set(eval_X.UserID)))
            self.item_original_id_list = sorted(
                set(X.ItemID).union(set(eval_X.ItemID)))

            self.train_data = X.copy()
            self.test_data = eval_X.copy()

            self.train_data.UserID = self.train_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.train_data.ItemID = self.train_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.test_data.UserID = self.test_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.test_data.ItemID = self.test_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.item_list = [
                idx[0] for idx in enumerate(self.item_original_id_list)
            ]
            self.user_list = [
                idx[0] for idx in enumerate(self.user_original_id_list)
            ]

            self.num_u = len(self.user_list)
            self.num_i = len(self.item_list)

            # build I_u_t, I_u_a (pre-computing for acceleration)
            self.build_itemset_for_user()

            # Calculate auxiliary-target correlation C for every user and each types of auxiliary action
            if correlation:
                self.alpha_u = self.auxiliary_target_correlation(
                    X=self.train_data)
            else:
                print(
                    "No auxiliary-target correlation - all alpha_u equal to one"
                )
                alpha_u_all_ones = dict()
                user_set_bar = tqdm(self.user_list)
                for u in user_set_bar:
                    alpha_u_all_ones[u] = dict()
                    alpha_u_all_ones[u]['alpha'] = 1.0
                self.alpha_u = alpha_u_all_ones.copy()

            # Generate item-set based on co-selection
            if coselection:
                self.S, self.U_item = self.itemset_coselection(
                    X=self.train_data)

            # Initialization of User and Item Matrices
            if self.random_state is not None:
                np.random.seed(self.random_state)
            else:
                np.random.seed(0)

            print("Initializing User and Item Matrices")
            # NOTE: Initialization is influenced by mean and std
            self.U = np.random.normal(size=(self.num_u, self.dim + 1),
                                      loc=0.0,
                                      scale=0.1)
            self.V = np.random.normal(size=(self.dim + 1, self.num_i),
                                      loc=0.0,
                                      scale=0.1)
            # self.U = np.zeros(shape=(self.num_u, self.dim + 1))
            # self.V = np.zeros(shape=(self.dim + 1, self.num_i))
            self.U[:, -1] = 1.0
            # estimation is U dot V
            self.estimation = np.dot(self.U, self.V)

        # Configure loss plots layout
        if plot_metric:
            groups = {
                'Precision@K': ['Precision@5', 'Precision@10'],
                'Recall@K': ['Recall@5', 'Recall@10'],
                'AUC': ['AUC']
            }
            plot_losses = PlotLosses(groups=groups)

        # Start Iteration
        all_item = set(self.item_list)
        user_in_train = sorted(set(self.train_data.UserID))
        print("Start Training")
        with trange(self.num_iter) as t:
            for index in t:
                # Description will be displayed on the left
                # t.set_description('ITER %i' % index)

                # Build u, I, J, K
                # uniformly sample a user from U
                u = choice(user_in_train)

                # build I
                # uniformly sample a item i from I_u_t
                I_u_t = self.I_u_t_train[u]
                if len(I_u_t) != 0:
                    i = choice(sorted(I_u_t))
                    # build I = I_u_t cap S_i
                    if coselection:
                        I = I_u_t.intersection(self.S[i])
                    else:
                        # if no coselection, we set I as the set of purchased items by user u
                        # no uniform sampling, like COFISET
                        I = I_u_t
                else:  # if no item in I_u_t, then set I to empty set
                    i = None
                    I = set()

                # build J, since we only have one auxiliary action, we follow the uniform sampling
                I_u_oa = self.I_u_a_train[u] - I_u_t
                if len(I_u_oa) != 0:
                    j = choice(sorted(I_u_oa))
                    if coselection:
                        # NOTE: typo in paper?
                        J = I_u_oa.intersection(self.S[j])
                    else:
                        # if no coselection, we set J as the set of only-auxiliary items by user u
                        # no uniform sampling, like COFISET
                        J = I_u_oa
                else:  # if no item in I_u_oa, then set J to empty set
                    j = None
                    J = set()

                # build K
                I_u_n = all_item - I_u_t - I_u_oa
                if len(I_u_n) != 0:
                    k = choice(sorted(I_u_n))
                    # build K
                    if coselection:
                        # NOTE: typo in paper?
                        K = I_u_n.intersection(self.S[k])
                    else:
                        # if no coselection, we set K as the set of no-action items by user u
                        # no uniform sampling, like COFISET
                        K = I_u_n
                else:  # if no item in I_u_n, then set K to empty set
                    k = None
                    K = set()

                # calculate intermediate variables
                # get specific alpha_u
                spec_alpha_u = self.alpha_u[u]['alpha']

                U_u = self.U[u, :-1].copy()
                sorted_I = sorted(I)
                sorted_J = sorted(J)
                sorted_K = sorted(K)

                # get r_hat_uIJ, r_hat_uJK, r_hat_uIK
                r_hat_uI = np.average(
                    self.estimation[u, sorted_I]) if len(I) != 0 else np.array(
                        [0])
                r_hat_uJ = np.average(
                    self.estimation[u, sorted_J]) if len(J) != 0 else np.array(
                        [0])
                r_hat_uK = np.average(
                    self.estimation[u, sorted_K]) if len(K) != 0 else np.array(
                        [0])

                r_hat_uIJ = r_hat_uI - r_hat_uJ
                r_hat_uJK = r_hat_uJ - r_hat_uK
                r_hat_uIK = r_hat_uI - r_hat_uK
                # get V_bar_I, V_bar_J, V_bar_K
                V_bar_I = np.average(self.V[:-1, sorted_I],
                                     axis=1) if len(I) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_J = np.average(self.V[:-1, sorted_J],
                                     axis=1) if len(J) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_K = np.average(self.V[:-1, sorted_K],
                                     axis=1) if len(K) != 0 else np.zeros(
                                         shape=(self.dim, ))
                # get b_I, b_J, b_K
                b_I = np.average(
                    self.V[-1, sorted_I]) if len(I) != 0 else np.array([0])
                b_J = np.average(
                    self.V[-1, sorted_J]) if len(J) != 0 else np.array([0])
                b_K = np.average(
                    self.V[-1, sorted_K]) if len(K) != 0 else np.array([0])

                # here we want to examine the condition of empty sets
                indicator_I = indicator(len(I) == 0)
                indicator_J = indicator(len(J) == 0)
                indicator_K = indicator(len(K) == 0)
                indicator_sum = indicator_I + indicator_J + indicator_K

                if 0 <= indicator_sum <= 1:
                    # these are the cases when only one set are empty or no set is empty
                    # when all three are not empty, or I is empty, or K is empty, it is
                    # easy to rewrite the obj by multiplying the indicator
                    # when J is empty, we have to rewrite the obj
                    if indicator_J == 1:
                        # when J is empty

                        # NABLA U_u
                        df_dUu = sigmoid(-r_hat_uIK) * (V_bar_I - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I
                                  ) * sigmoid(-r_hat_uIK) / indicator_len(I)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # No change on J

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uIK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                    else:
                        # when J is not empty
                        # NABLA U_u
                        df_dUu = (1 - indicator_I) * sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * (
                                V_bar_I - V_bar_J) + \
                                 (1 - indicator_K) * sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I) * sigmoid(
                            -r_hat_uIJ / spec_alpha_u) / (indicator_len(I) *
                                                          spec_alpha_u)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # NABLA V_j
                        df_dbj = (1 - indicator_I) * (
                            -sigmoid(-r_hat_uIJ / spec_alpha_u) / spec_alpha_u
                            + (1 - indicator_K) *
                            sigmoid(-r_hat_uJK)) / indicator_len(J)
                        dR_dbj = 2 * self.lambda_b * b_J / indicator_len(J)
                        df_dVj = df_dbj * U_u
                        dR_dVj = 2 * self.lambda_v * V_bar_J / indicator_len(J)

                        # update V_j = V_j + gamma * (df_dVj - dR_dVj)
                        self.V[:-1, sorted_J] += self.gamma * (
                            df_dVj - dR_dVj)[:, None]  # trick: transpose here
                        # update b_j = b_j + gamma * (df_dbj - dR_dbj)
                        self.V[-1, sorted_J] += self.gamma * (df_dbj - dR_dbj)

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uJK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                else:
                    # these are the cases when at least two sets are empty
                    # at these cases, we ignore this user and continue the loop
                    continue

                # calculate loss
                # f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK))
                # regula = self.lambda_u * np.linalg.norm(U_u, ord=2) + self.lambda_v * (
                #        (np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + (
                #            np.linalg.norm(V_bar_J, ord=2) if len(J) != 0 else 0) + (
                #            np.linalg.norm(V_bar_K, ord=2)) if len(K) != 0 else 0) + self.lambda_b * (
                #                     (b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + (
                #                 b_K if len(K) != 0 else 0) ** 2)
                # bprh_loss = f_Theta - regula

                # update estimation
                old_estimation = self.estimation.copy()
                # self.estimation = np.dot(self.U, self.V)
                all_sampled_item = sorted(set.union(I, J, K))
                # for sampled_item in all_sampled_item:
                #    self.estimation[:, sampled_item] = np.dot(self.U, self.V[:, sampled_item])
                self.estimation[:, all_sampled_item] = np.dot(
                    self.U, self.V[:, all_sampled_item])
                # estimation changed
                est_changed = np.linalg.norm(self.estimation - old_estimation)

                # we only save model to file when the num of iter % iter_to_save == 0
                if (index + 1) % iter_to_save == 0:
                    self.save(model_path=model_saved_path + "_" + str(index))

                # we only calculate metric when the num of iter % iter_to_log == 0
                if (index + 1) % iter_to_log == 0:
                    if log_metric | plot_metric:
                        # calculate metrics on test data
                        user_to_eval = sorted(set(self.test_data.UserID))
                        scoring_list_5, precision_5, recall_5, avg_auc = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=5,
                            train_data_as_reference_flag=True)
                        scoring_list_10, precision_10, recall_10, _ = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=10,
                            train_data_as_reference_flag=True)
                    if log_metric:
                        self.eval_hist.append([
                            index, precision_5, precision_10, recall_5,
                            recall_10, avg_auc
                        ])

                    if plot_metric:
                        plot_losses.update({
                            'Precision@5': precision_5,
                            'Precision@10': precision_10,
                            'Recall@5': recall_5,
                            'Recall@10': recall_10,
                            'AUC': avg_auc
                        })
                        plot_losses.send()

                # Postfix will be displayed on the right,
                # formatted automatically based on argument's datatype
                t.set_postfix(est_changed=est_changed,
                              len_I=len(I),
                              len_J=len(J),
                              len_K=len(K))
コード例 #30
0
    def train_advanced(self, data_loaders, show_plot=True):
        liveloss = PlotLosses()

        how_near = 0.2

        for epoch in range(self.num_epochs):
            logs = {}

            for phase in ['train', 'validation']:
                if phase == 'train':
                    self.train()
                else:
                    self.eval()

                running_loss = 0.0

                for inputs, labels in data_loaders[phase]:

                    inputs = T.DoubleTensor(inputs).to(self.device)
                    targets = T.DoubleTensor(inputs).to(self.device)

                    #inputs = T.DoubleTensor(inputs)
                    inputs = Variable(inputs).to(self.device)
                    targets = Variable(targets).to(self.device)

                    #self.optimizer.zero_grad()
                    #outputs = self.forward(inputs)
                    outputs = self.encoder(inputs)
                    outputs = self.decoder(outputs)
                    loss = self.criterion(outputs, inputs)

                    if phase == 'train':
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()

                    preds = outputs.view(len(inputs), self.original_dim)
                    targets = targets.view(len(inputs), self.original_dim)
                    #preds = preds.detach().cpu().numpy()
                    #targets = targets.detach().cpu().numpy()

                    #print(preds)
                    #print('---')
                    #print(targets)
                    #preds = outputs.item()

                    running_loss += loss.detach() * inputs.size(0)
                    '''
                    if T.sum((T.abs(preds - targets) < T.abs(how_near*preds))):
                        n_corrects += 1
                    else:
                        n_wrongs += 1
                    '''
                epoch_loss = running_loss / len(data_loaders[phase].dataset)
                #epoch_acc = (n_corrects*100) / len(data_loaders[phase].dataset)
                epoch_acc = self.accuracy(targets, preds)

                prefix = ''
                if phase == 'validation':
                    prefix = 'val_'

                    #print('[Model] epoch=%s, loss=%s , acc=%s' % ( epoch, loss.item(), epoch_acc.item))
                    print('[Model] epoch=%s, loss1=%s, loss2=%s  acc=%s' %
                          (epoch, loss.item(), epoch_loss.item(), epoch_acc))

                logs[prefix + 'log loss'] = epoch_loss.item()
                logs[prefix + 'accuracy'] = epoch_acc

            if show_plot:
                liveloss.update(logs)
                liveloss.send()