예제 #1
0
    def __init__(
        self,
        model=None,
        data_loader=None,
        train_times=1000,
        lr=1e-3,
        alpha=0.5,
        use_gpu=True,
        opt_method="sgd",
        save_steps=None,
        checkpoint_dir=None,
    ):

        self.work_threads = 8
        self.train_times = train_times

        self.opt_method = opt_method
        self.optimizer = None
        self.lr_decay = 0
        self.weight_decay = 0
        self.alpha = alpha
        self.lr = lr

        self.model = model
        self.data_loader = data_loader
        self.use_gpu = use_gpu
        self.save_steps = save_steps
        self.checkpoint_dir = checkpoint_dir

        self.liveplot = PlotLosses()
예제 #2
0
class LivelossCallback(AvgStatsCallback):
    def __init__(self, metrics):
        super().__init__(metrics)
        self.liveloss = PlotLosses(skip_first=0)
        self.metricnames = [m.__name__ for m in metrics]
        self.logs = {}

    def begin_epoch(self):
        super().begin_epoch()
        self.logs = {}
        self.iteration = 0

    def after_loss(self):
        super().after_loss()
        if self.in_train:
            self.iteration += 1
            print(
                "\r[%d, %5d] Train_loss: %.3f" %
                (self.epoch + 1, self.iteration, self.loss),
                end="",
            )

    def after_epoch(self):
        super().after_epoch()
        self.logs["loss"] = self.train_stats.avg_stats[0]
        self.logs["val_loss"] = self.valid_stats.avg_stats[0]
        for i, metric in enumerate(self.metricnames):
            self.logs[metric] = self.train_stats.avg_stats[i + 1].item()
            self.logs["val_" + metric] = self.valid_stats.avg_stats[i +
                                                                    1].item()
        self.liveloss.update(self.logs)
        self.liveloss.draw()
예제 #3
0
파일: utils.py 프로젝트: liuziyuan827/test1
 def __init__(self, n_epochs, batches_epoch, out_dir, start_epoch=1):
     # self.viz = Visdom()
     self.n_epochs = n_epochs
     self.batches_epoch = batches_epoch
     self.epoch = start_epoch
     self.batch = 1
     self.prev_time = time.time()
     self.mean_period = 0
     self.losses = {}
     self.loss_windows = {}
     self.image_windows = {}
     self.out_dir = out_dir
     self.to_image = transforms.ToPILImage()
     self.liveloss = PlotLosses()
예제 #4
0
def train(D, G, D_optimizer, G_optimizer, D_loss, G_loss, data_loader,
          options):
    """
  Inputs:
    - `options`: A dictionary of options to configure the GAN. Required values:
                    `batch_size` - (int) The size of each batch.
                    `epoch_count` - (int) The number of epochs to run.
                    `data_type` -
                    `glyph_size` - (tuple or triple, [int, int, (int)]) The size of the image (H, W, C)
                    `glyphs_per_image` - (int) The number of glyphs found on each image

  Returns: Dictionary of losses.

  """
    epoch_count = options['epoch_count']
    visualize = options['visualize']
    losses = collections.defaultdict(list)
    loss_plot = PlotLosses()

    if visualize:
        real_test, static_test = prepare_static_test(data_loader, options)
        visualize_progress(G, real_test, static_test)

    for _ in range(epoch_count):
        train_epoch(D, G, D_optimizer, G_optimizer, D_loss, G_loss,
                    data_loader, losses, options)

        if visualize:
            record_losses(loss_plot, losses)
            visualize_progress(G, real_test, static_test)

    return losses
예제 #5
0
def fit_model(cfg,net,loader,verbose=False ) :
    optimizer = torch.optim.Adam(net.parameters(), lr=cfg["learning_rate"], weight_decay=cfg["weight_decay"])
    loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
    # Setup Pytorch in training mode
    net.train()
    # start training
    loss_hist = {}
    liveloss = PlotLosses()
    logs = {}
    lowest = 999999

    best_params = None
    for epoch in range(cfg["num_epochs"]):
        epoch_loss = 0
        for step, (batch_x, batch_y) in enumerate(loader): # for each training step
            prediction = net(batch_x).reshape(-1)     # input x and predict based on x
            if verbose :npt("batch_x.size:{}".format(batch_x.size()))
            if verbose :npt("batch_y.size:{}".format(batch_y.size()))
            if verbose :npt("prediction.size:{}".format(prediction.size()))

            loss = loss_func(prediction, batch_y)     # must be (1. nn output, 2. target)
            epoch_loss += loss.detach().cpu().numpy()
            optimizer.zero_grad()   # clear gradients for next train
            loss.backward()         # backpropagation, compute gradients
            optimizer.step()        # apply gradients

        epoch_loss = epoch_loss / 900

        # Draw Loss curves, gradients, and current inference results...
        visualize_results(cfg,epoch,liveloss,epoch_loss,loss_hist,net)
        print("epoch_loss {}".format(epoch_loss))
        pstr = '\repoch: {},  lr: {}, lowest_loss: {:7.5e}, latest_loss: {:7.5e}\n'.format(epoch, cfg["learning_rate"], lowest, epoch_loss)
        print(pstr,end="")
    return epoch_loss
예제 #6
0
class LiveLossPlotListener(DojoListener):
    """
    DojoListener implementation which renders a livelossplot after finishing a dan.
    """
    def __init__(self):
        self.liveloss = None

    def training_started(self, aikidoka: Aikidoka, kata: Kata, kun: DojoKun):
        self.liveloss = PlotLosses()

    def dan_finished(self, aikidoka: Aikidoka, run: (int, int),
                     metrics: (float, float)):
        (loss, acc) = metrics

        self.liveloss.update({"loss": loss, "train_acc": acc})
        self.liveloss.draw()
    def train_vae(self,
                  epochs=10,
                  hidden_size=2,
                  lr=0.0005,
                  recon_loss_method='mse'):
        """
        Handles the training of the vae model.

        Parameters
        ----------
        epochs : int
            Number of complete passes over the whole training set.
        hidden_size : int
            Size of the latent space of the vae.
        lr : float.
            Learning rate for the vae model training.
        recon_loss_method : str
            Method for reconstruction loss calculation

        Returns
        -------
        None

        """
        set_seed(42)  # Set the random seed
        self.model = VAE(hidden_size, self.input.shape)  # Initialise model

        # Create optimizer
        optimizer = optim.Adam(self.model.parameters(),
                               lr=lr,
                               betas=(0.9, 0.999))

        if self.plot_loss:
            liveloss = PlotLosses()
            liveloss.skip_first = 0
            liveloss.figsize = (16, 10)

        # Start training loop
        for epoch in range(1, epochs + 1):
            tl = train(epoch,
                       self.model,
                       optimizer,
                       self.train_loader,
                       recon_loss_method=recon_loss_method
                       )  # Train model on train dataset
            testl = test(epoch,
                         self.model,
                         self.test_loader,
                         recon_loss_method=recon_loss_method)

            if self.plot_loss:  # log train and test losses for dynamic plot
                logs = {}
                logs['' + 'ELBO'] = tl
                logs['val_' + 'ELBO'] = testl
                liveloss.update(logs)
                liveloss.draw()
예제 #8
0
    def fit(self, optimizer, patience, num_epochs=200):

        liveloss = PlotLosses()
        # initialize the early_stopping object
        early_stopping = EarlyStopping(patience=patience,
                                       verbose=True,
                                       metric='auc')

        for epoch in tqdm(range(num_epochs)):
            logs = {}
            self.train(optimizer)
            val_auc, val_ap = self.evaluate(validation=True, test=False)

            logs['val_auc'] = val_auc
            logs['val_ap'] = val_ap

            liveloss.update(logs)
            liveloss.send()

            self.writer.add_scalar('val_auc', val_auc, epoch)
            self.writer.add_scalar('val_ap', val_ap, epoch)

            ### Add Early stop implementation
            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current model
            early_stopping(val_auc, self.model)

            if early_stopping.early_stop:
                print("Early stopping")
                break
        # load the last checkpoint with the best model
        self.model.load_state_dict(torch.load('checkpoint.pt'))
        return self.model
예제 #9
0
def test_neptune():
    neptune_logger = NeptuneLogger(
        api_token="ANONYMOUS", project_qualified_name="shared/colab-test-run", tags=['livelossplot', 'github-actions']
    )

    plotlosses = PlotLosses(outputs=[neptune_logger])

    assert neptune_logger.experiment.state == 'running'

    for i in range(3):
        plotlosses.update(
            {
                'acc': 1 - np.random.rand() / (i + 2.),
                'val_acc': 1 - np.random.rand() / (i + 0.5),
                'loss': 1. / (i + 2.),
                'val_loss': 1. / (i + 0.5)
            }
        )
        plotlosses.send()

    assert neptune_logger.experiment.state == 'running'

    neptune_logger.close()

    assert neptune_logger.experiment.state == 'succeeded'

    url = neptune.project._get_experiment_link(neptune_logger.experiment)

    assert len(url) > 0
예제 #10
0
 def __init__(self, model, optimizer, train_loader, validate_loader,
     criterion=nn.CrossEntropyLoss(), device="cpu", keep_best=0):
     "Stores the parameters on the class instance for later methods"
     
     for arg in ["model", "optimizer", "train_loader", "validate_loader",
     "criterion", "device", "keep_best"]:
         exec("self." + arg + "=" + arg)
         
     try:
         self.transform = validate_loader.dataset.transform
     except:
         print("No transform found, test data must be normalised manually")
     
     # store the liveloss as it holds all our logs, useful for later
     self.liveloss = PlotLosses()
     # store the best model params
     self.best_params_dict = {}
     # store the current epoch between training batches
     self.epoch = 0
     # for keeping the best model params
     self.max_acc=0.
         
     return
예제 #11
0
def train(model, patch_train_loader, patch_val_loader, EPOCHS, learning_rate):
  loss_func = nn.MSELoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)#, weight_decay=0.99)
  liveloss = PlotLosses()
  lr2_tr_loss = []
  lr2_val_loss = []
  model_losses, valid_losses = [], []
      
  for epoch in range(EPOCHS):
    print("epoch{}".format(epoch))
    model_losses, valid_losses = [], []
    logs = {}
    prefix = ''
      
    # with train data
    model.train()
    for idx, (data,target) in enumerate(patch_train_loader):
        data = torch.autograd.Variable(data).to(device = device, dtype = torch.float)
        print(data.shape)
        optimizer.zero_grad()
        pred = model(data)
        print(pred.shape)
        loss = loss_func(pred, data)
        # Backpropagation
        loss.backward()
        # update
        optimizer.step()
        # loss save
        model_losses.append(loss.cpu().data.item())
        logs[prefix + 'MSE loss'] = loss.item()
        print(idx,"complete")
          
    ## with validation data(only nodefect)
    model.eval()
    for idx, (data,target) in enumerate(patch_val_loader):
        data = torch.autograd.Variable(data).to(device = device, dtype = torch.float)
        pred = model(data)
        loss = loss_func(pred, data)
        valid_losses.append(loss.item())
        prefix = 'val_'
        logs[prefix + 'MSE loss'] = loss.item()
             
    lr2_tr_loss.append(np.mean(model_losses))
    lr2_val_loss.append(np.mean(valid_losses))
    liveloss.update(logs)
    liveloss.draw()
    print ("Epoch:", epoch+1, " Training Loss: ", np.mean(model_losses), " Valid Loss: ", np.mean(valid_losses))
    ## epoch 별로 모델을 저장을 해서, 혹시 overfitting이 된다면 그 이전의 epoch때를 저장해서 AE모델로 사용하고자한다.
    path = os.path.join("/content/drive/Shared drives/data/nocrop/model/hs/model{}".format(str(model)[11:12]),str(model)[:12] + '_epoch{}.pth'.format(epoch))
    torch.save(model.state_dict(), path)
    
    ## epoch19(즉 마지막 에포크)때의 모델을 AE모델로 저장
    if epoch == EPOCHS -1:
        path = os.path.join("/content/drive/Shared drives/data/nocrop/model/hs",str(model)[:12] + '.pth')
        torch.save(model.state_dict(), path)
        return lr2_tr_loss, lr2_val_loss
예제 #12
0
def test_default_from_step():
    """Test without from_step"""
    out = CheckOutput(target_log_history_length=10)
    loss_plotter = PlotLosses(outputs=[out])
    for idx in range(10):
        loss_plotter.update({
            'acc': 0.1 * idx,
            'loss': 0.69 / (idx + 1),
        })
    loss_plotter.send()
예제 #13
0
def test_minus_from_step():
    """Test from_step < 0"""
    out = CheckOutput(target_log_history_length=6)
    loss_plotter = PlotLosses(outputs=[out], from_step=-5)
    for idx in range(10):
        loss_plotter.update({
            'acc': 0.1 * idx,
            'loss': 0.69 / (idx + 1),
        })
    loss_plotter.send()
def execute(model, n_epochs, trn_ldr, val_ldr, opti, crit, plot):
    '''
    This routine is responsible for the entire training process, and handles in-training plotting

    Arguments:
    model       : the model to be trained                                   // nn.Module
    n_epochs    : the number of epochs the model should be trained for      // integer
    trn_ldr     : the training dataloader                                   // dataloader
    val_ldr     : the validation dataloader                                 // dataloader
    opti        : the optimiser object                                      // optim
    crit        : the criterion (loss) function                             // nn loss function
    plot        : a flag denoting whether in-training plotting should occur // boolean

    Parameters:
    liveloss    : responsible for in-training plotting, activated by plot   // PlotLosses() object
    epoch       : the current epoch number                                  // integer
    logs        : holds the log data for the current epoch                  // dict
    trn_los     : the training loss for the current epoch                   // float
    trn_acc     : the training accuracy for the current epoch               // float
    val_los     : the validation loss for the current epoch                 // float
    val_acc     : the validation accuracy for the current epoch             // float

    Returns:
    model       : the final, trained model                                  // nn.Module
    '''

    if plot:
        liveloss = PlotLosses()  # initialise liveloss if plotting flag true

    for epoch in range(n_epochs):
        logs = {}

        trn_los, trn_acc = trn(model, opti, crit,
                               trn_ldr)  # run the training cycle
        logs['' + 'log loss'] = trn_los.item()
        logs['' + 'accuracy'] = trn_acc.item()  # update the logs

        val_los, val_acc = val(model, crit,
                               val_ldr)  # run the validation cycle
        logs['val_' + 'log loss'] = val_los.item()
        logs['val_' + 'accuracy'] = val_acc.item()  # update the logs

        if plot:
            liveloss.update(logs)
            liveloss.draw()  # print the plots if flag is true
        if not plot:
            print(
                "Epoch: " +
                str(epoch))  # if not plotting, print epoch number for tracking

    return model  # return finished trained model
예제 #15
0
def main(args):
    liveloss = PlotLosses(groups=kfold_groups, group_patterns=group_patterns)
    
    global device
    device = args.device
    epochs = args.epochs
    bs = args.batch_size
    lr = args.lr
    dataset = args.dataset
    savef = args.savef
    loadf = args.loadf
    args.takeout = [1, 3]  # Take out node representation layers for infomax.

    path = osp.join(osp.abspath(''), 'data', dataset)
    dataset = TUDataset(path, name=dataset).shuffle()
    dataloader = DataLoader(dataset, batch_size=bs)
    
    args.num_classes = dataset.num_classes
    args.num_features = max(dataset.num_features, 1)
    
    model = HGI(args).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    if loadf:
        model.load_state_dict(torch.load(loadf))
        kfoldacc = test(model, dataloader, args)
        print('Kfold accuracy: {:.4f}'.format(kfoldacc))
        return
    
    for epoch in range(1, epochs+1):
        loss = train(model, optimizer, dataloader)
        kfoldacc = test(model, dataloader, args)
        log(liveloss, loss, None, kfoldacc, None)
        
        best_val_i = max(liveloss.logger.log_history['kfold_acc'], key=lambda i: i.value)
        step, best_val = best_val_i.step, best_val_i.value
        if savef and kfoldacc >= best_val:
            torch.save(model.state_dict(), savef)
        
    best_val = final_log(liveloss)
    
    return best_val
예제 #16
0
def main():
    api_token = os.environ.get('NEPTUNE_API_TOKEN')
    project_qualified_name = os.environ.get('NEPTUNE_PROJECT_NAME')
    logger = NeptuneLogger(api_token=api_token,
                           project_qualified_name=project_qualified_name)
    liveplot = PlotLosses(outputs=[logger])
    for i in range(20):
        liveplot.update({
            'accuracy': 1 - np.random.rand() / (i + 2.),
            'val_accuracy': 1 - np.random.rand() / (i + 0.5),
            'mse': 1. / (i + 2.),
            'val_mse': 1. / (i + 0.5)
        })
        liveplot.send()
        sleep(.5)
예제 #17
0
def test_bokeh_plot():
    logger = BokehPlot()

    liveplot = PlotLosses(outputs=[logger], mode='script')

    for i in range(3):
        liveplot.update({
            'acc': 1 - np.random.rand() / (i + 2.),
            'val_acc': 1 - np.random.rand() / (i + 0.5),
            'loss': 1. / (i + 2.),
            'val_loss': 1. / (i + 0.5)
        })
        liveplot.send()

    assert os.path.isfile(logger.output_file)
예제 #18
0
def train_model_gener(model, criterion, optimizer, dataloaders, num_epochs=10):
    liveloss = PlotLosses()
    model = model.to(device)

    for epoch in range(num_epochs):
        logs = {}
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs_full, labels_class in dataloaders[phase]:

                # here are changes!
                inputs = inputs_full[:, :-1].to(device)
                labels = inputs_full[:, 1:].to(device)

                outputs = model(inputs)

                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.detach() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(
                dataloaders[phase].dataset)

            prefix = ''
            if phase == 'validation':
                prefix = 'val_'

            logs[prefix + 'log loss'] = epoch_loss.item()
            logs[prefix + 'accuracy'] = epoch_acc.item()

        liveloss.update(logs)
        liveloss.draw()
def test_tensorboard():
    groups = {
        'acccuracy': ['acc', 'val_acc'],
        'log-loss': ['loss', 'val_loss']
    }
    logger = TensorboardTFLogger()

    liveplot = PlotLosses(groups=groups, outputs=(logger, ))

    for i in range(3):
        liveplot.update({
            'acc': 1 - np.random.rand() / (i + 2.),
            'val_acc': 1 - np.random.rand() / (i + 0.5),
            'loss': 1. / (i + 2.),
            'val_loss': 1. / (i + 0.5)
        })
        liveplot.send()

    assert all([
        f.startswith('events.out.tfevents.') for f in os.listdir(logger._path)
    ])
    def fit(self, train_loader):
        liveloss = PlotLosses()
        logs = {}

        for epoch in range(self.epoch_num):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data.float()).to(
                    self.device), Variable(target.float()).to(self.device)
                data = data.view(-1, self.input_layer_size)
                target = target.view(-1, self.input_layer_size)
                self.optimizer.zero_grad()
                net_out = self.model(data)
                loss = self.criterion(net_out, target)
                loss.backward()
                self.optimizer.step()
            epoch_loss = loss.detach()
            logs['MSE loss'] = epoch_loss.item()
            liveloss.update(logs)
            liveloss.send()

        print("Number of weight coefficients:",
              self.model.number_of_weight_coefficients)
예제 #21
0
파일: monitor.py 프로젝트: mindis/VBCAR
 def init_live_plot(self, file):
     self.liveloss = PlotLosses(fig_path=file)
예제 #22
0
    def train(self,
              train_ds,
              valid_ds,
              plot_loss=True,
              verbose=True,
              save_path=None,
              need_y: str = 'no'):
        """Method for training, takes train and validation Datasets, as well
        as parameters specifying training monitoring and trains a network for
        a given set of hyperparameters.

        :param train_ds: training Dataset
        :param valid_ds: validation Dataset
        :param plot_loss: whether to plot loss during training
        :param verbose: whether to print loss after each epoch
        :param save_path: if given, serialises the model and saves there
        :param need_y: command to extract y's in order to train Attention based models with
        'state' or 'switch cells' layer
        """
        # Create DataLoaders
        assert need_y in ['no', 'yes'], 'Should be no/yes'
        train_dl = DataLoader(train_ds,
                              batch_size=self.batch_size,
                              shuffle=True)
        test_dl = DataLoader(valid_ds, batch_size=self.batch_size)

        # Dictionary for losses
        losses = {'train_loss': [], 'valid_loss': []}

        # Plot losses if the user chooses so
        if plot_loss:
            liveloss = PlotLosses()

        # Iterate over epochs
        for epoch in range(self.max_epochs):

            # Switch to training mode
            self.model.train()

            if verbose:
                print('Starting epoch {}'.format(epoch + 1))

            # A list for batch-wise training losses in a given epoch
            epoch_loss = []

            # Iterate over batches
            for idx_batch, batch in enumerate(train_dl):
                self.optimizer.zero_grad()
                if need_y == 'yes':
                    out = self.model(batch[0]['train_obs'].permute(1, 0, 2),
                                     y=batch[1].permute(1, 0))
                    tr_loss = self.loss(out, batch[0]['train_y'].to(DEVICE))
                elif need_y == 'no':
                    out = self.model(batch['train_obs'].permute(1, 0, 2))
                    tr_loss = self.loss(out, batch['train_y'].to(DEVICE))
                epoch_loss.append(tr_loss.item())
                tr_loss.backward()
                self.optimizer.step()

            # Switch to evaluation mode
            self.model.eval()

            # Compute training loss for the epoch
            losses['train_loss'].append(sum(epoch_loss) / len(train_dl))

            # Compute validation loss by iterating through valid dl batches
            with torch.no_grad():

                # A list for batch-wise validation losses
                val_loss = []

                # Iterate over batches in the validation DataLoader
                for idx_v_batch, v_batch in enumerate(test_dl):
                    if need_y == 'yes':
                        val_loss.append(
                            self.loss(
                                self.model(v_batch[0]['test_obs'].permute(
                                    1, 0, 2),
                                           y=v_batch[1].permute(1, 0)),
                                v_batch[0]['test_y']).item())
                    elif need_y == 'no':
                        val_loss.append(
                            self.loss(
                                self.model(v_batch['test_obs'].permute(
                                    1, 0, 2)), v_batch['test_y']).item())
                losses['valid_loss'].append(sum(val_loss) / len(test_dl))

            # Printing loss for a given epoch
            if verbose:
                print('Loss: {}'.format(losses['valid_loss'][epoch]))
            # Plot loss after each epoch if the user chose to
            if plot_loss:
                logs = {
                    'log_loss': losses['train_loss'][epoch],
                    'val_log_loss': losses['valid_loss'][epoch]
                }

                liveloss.update(logs)
                liveloss.draw()

            # Early stopping
            if self.early_stopping_patience:
                lag_1 = losses['valid_loss'][(
                    epoch - self.early_stopping_patience):epoch]
                lag_2 = losses['valid_loss'][(epoch -
                                              self.early_stopping_patience -
                                              1):(epoch - 1)]
                no_drops = sum(True if l1 < l2 else False
                               for l1, l2 in zip(lag_1, lag_2))
                if epoch > self.early_stopping_patience and no_drops == 0:
                    break

        # Save last loss
        self.final_loss = np.mean(losses['valid_loss'][-1])
        self.last_epoch = epoch

        # Save model
        if save_path:
            torch.save(self.model.state_dict(), save_path)
예제 #23
0
파일: bprH.py 프로젝트: qiaojj/BPRH
    def fit(self,
            X,
            eval_X,
            y=None,
            model_saved_path='bprh_model.pkl',
            iter_to_save=5000,
            coselection_saved_path='data/item-set-coselection.pkl',
            iter_to_log=100,
            correlation=True,
            coselection=False,
            plot_metric=False,
            log_metric=False):
        # Here we do not load model -> train a new model
        if self.existed_model_path is None:
            # To make sure train and test works with inconsistent user and item list,
            # we transform user and item's string ID to int ID so that their ID is their index in U and V
            print("Registering Model Parameters")
            # rename user and item
            self.user_original_id_list = sorted(
                set(X.UserID).union(set(eval_X.UserID)))
            self.item_original_id_list = sorted(
                set(X.ItemID).union(set(eval_X.ItemID)))

            self.train_data = X.copy()
            self.test_data = eval_X.copy()

            self.train_data.UserID = self.train_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.train_data.ItemID = self.train_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.test_data.UserID = self.test_data.UserID.apply(
                lambda x: self.user_original_id_list.index(x))
            self.test_data.ItemID = self.test_data.ItemID.apply(
                lambda x: self.item_original_id_list.index(x))

            self.item_list = [
                idx[0] for idx in enumerate(self.item_original_id_list)
            ]
            self.user_list = [
                idx[0] for idx in enumerate(self.user_original_id_list)
            ]

            self.num_u = len(self.user_list)
            self.num_i = len(self.item_list)

            # build I_u_t, I_u_a (pre-computing for acceleration)
            self.build_itemset_for_user()

            # Calculate auxiliary-target correlation C for every user and each types of auxiliary action
            if correlation:
                self.alpha_u = self.auxiliary_target_correlation(
                    X=self.train_data)
            else:
                print(
                    "No auxiliary-target correlation - all alpha_u equal to one"
                )
                alpha_u_all_ones = dict()
                user_set_bar = tqdm(self.user_list)
                for u in user_set_bar:
                    alpha_u_all_ones[u] = dict()
                    alpha_u_all_ones[u]['alpha'] = 1.0
                self.alpha_u = alpha_u_all_ones.copy()

            # Generate item-set based on co-selection
            if coselection:
                self.S, self.U_item = self.itemset_coselection(
                    X=self.train_data)

            # Initialization of User and Item Matrices
            if self.random_state is not None:
                np.random.seed(self.random_state)
            else:
                np.random.seed(0)

            print("Initializing User and Item Matrices")
            # NOTE: Initialization is influenced by mean and std
            self.U = np.random.normal(size=(self.num_u, self.dim + 1),
                                      loc=0.0,
                                      scale=0.1)
            self.V = np.random.normal(size=(self.dim + 1, self.num_i),
                                      loc=0.0,
                                      scale=0.1)
            # self.U = np.zeros(shape=(self.num_u, self.dim + 1))
            # self.V = np.zeros(shape=(self.dim + 1, self.num_i))
            self.U[:, -1] = 1.0
            # estimation is U dot V
            self.estimation = np.dot(self.U, self.V)

        # Configure loss plots layout
        if plot_metric:
            groups = {
                'Precision@K': ['Precision@5', 'Precision@10'],
                'Recall@K': ['Recall@5', 'Recall@10'],
                'AUC': ['AUC']
            }
            plot_losses = PlotLosses(groups=groups)

        # Start Iteration
        all_item = set(self.item_list)
        user_in_train = sorted(set(self.train_data.UserID))
        print("Start Training")
        with trange(self.num_iter) as t:
            for index in t:
                # Description will be displayed on the left
                # t.set_description('ITER %i' % index)

                # Build u, I, J, K
                # uniformly sample a user from U
                u = choice(user_in_train)

                # build I
                # uniformly sample a item i from I_u_t
                I_u_t = self.I_u_t_train[u]
                if len(I_u_t) != 0:
                    i = choice(sorted(I_u_t))
                    # build I = I_u_t cap S_i
                    if coselection:
                        I = I_u_t.intersection(self.S[i])
                    else:
                        # if no coselection, we set I as the set of purchased items by user u
                        # no uniform sampling, like COFISET
                        I = I_u_t
                else:  # if no item in I_u_t, then set I to empty set
                    i = None
                    I = set()

                # build J, since we only have one auxiliary action, we follow the uniform sampling
                I_u_oa = self.I_u_a_train[u] - I_u_t
                if len(I_u_oa) != 0:
                    j = choice(sorted(I_u_oa))
                    if coselection:
                        # NOTE: typo in paper?
                        J = I_u_oa.intersection(self.S[j])
                    else:
                        # if no coselection, we set J as the set of only-auxiliary items by user u
                        # no uniform sampling, like COFISET
                        J = I_u_oa
                else:  # if no item in I_u_oa, then set J to empty set
                    j = None
                    J = set()

                # build K
                I_u_n = all_item - I_u_t - I_u_oa
                if len(I_u_n) != 0:
                    k = choice(sorted(I_u_n))
                    # build K
                    if coselection:
                        # NOTE: typo in paper?
                        K = I_u_n.intersection(self.S[k])
                    else:
                        # if no coselection, we set K as the set of no-action items by user u
                        # no uniform sampling, like COFISET
                        K = I_u_n
                else:  # if no item in I_u_n, then set K to empty set
                    k = None
                    K = set()

                # calculate intermediate variables
                # get specific alpha_u
                spec_alpha_u = self.alpha_u[u]['alpha']

                U_u = self.U[u, :-1].copy()
                sorted_I = sorted(I)
                sorted_J = sorted(J)
                sorted_K = sorted(K)

                # get r_hat_uIJ, r_hat_uJK, r_hat_uIK
                r_hat_uI = np.average(
                    self.estimation[u, sorted_I]) if len(I) != 0 else np.array(
                        [0])
                r_hat_uJ = np.average(
                    self.estimation[u, sorted_J]) if len(J) != 0 else np.array(
                        [0])
                r_hat_uK = np.average(
                    self.estimation[u, sorted_K]) if len(K) != 0 else np.array(
                        [0])

                r_hat_uIJ = r_hat_uI - r_hat_uJ
                r_hat_uJK = r_hat_uJ - r_hat_uK
                r_hat_uIK = r_hat_uI - r_hat_uK
                # get V_bar_I, V_bar_J, V_bar_K
                V_bar_I = np.average(self.V[:-1, sorted_I],
                                     axis=1) if len(I) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_J = np.average(self.V[:-1, sorted_J],
                                     axis=1) if len(J) != 0 else np.zeros(
                                         shape=(self.dim, ))
                V_bar_K = np.average(self.V[:-1, sorted_K],
                                     axis=1) if len(K) != 0 else np.zeros(
                                         shape=(self.dim, ))
                # get b_I, b_J, b_K
                b_I = np.average(
                    self.V[-1, sorted_I]) if len(I) != 0 else np.array([0])
                b_J = np.average(
                    self.V[-1, sorted_J]) if len(J) != 0 else np.array([0])
                b_K = np.average(
                    self.V[-1, sorted_K]) if len(K) != 0 else np.array([0])

                # here we want to examine the condition of empty sets
                indicator_I = indicator(len(I) == 0)
                indicator_J = indicator(len(J) == 0)
                indicator_K = indicator(len(K) == 0)
                indicator_sum = indicator_I + indicator_J + indicator_K

                if 0 <= indicator_sum <= 1:
                    # these are the cases when only one set are empty or no set is empty
                    # when all three are not empty, or I is empty, or K is empty, it is
                    # easy to rewrite the obj by multiplying the indicator
                    # when J is empty, we have to rewrite the obj
                    if indicator_J == 1:
                        # when J is empty

                        # NABLA U_u
                        df_dUu = sigmoid(-r_hat_uIK) * (V_bar_I - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I
                                  ) * sigmoid(-r_hat_uIK) / indicator_len(I)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # No change on J

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uIK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                    else:
                        # when J is not empty
                        # NABLA U_u
                        df_dUu = (1 - indicator_I) * sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * (
                                V_bar_I - V_bar_J) + \
                                 (1 - indicator_K) * sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K)
                        dR_dUu = 2 * self.lambda_u * U_u
                        # update U_u = U_u + gamma * (df_dUu - dR_dUu)
                        self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu)

                        # NABLA V_i
                        df_dbi = (1 - indicator_I) * sigmoid(
                            -r_hat_uIJ / spec_alpha_u) / (indicator_len(I) *
                                                          spec_alpha_u)
                        dR_dbi = (
                            1 - indicator_I
                        ) * 2 * self.lambda_b * b_I / indicator_len(I)
                        df_dVi = df_dbi * U_u
                        dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I)
                        # update V_i = V_i + gamma * (df_dVi - dR_dVi)
                        self.V[:-1, sorted_I] += self.gamma * (
                            df_dVi - dR_dVi)[:, None]  # trick: transpose here
                        # update b_i = b_i + gamma * (df_dbi - dR_dbi)
                        self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi)

                        # NABLA V_j
                        df_dbj = (1 - indicator_I) * (
                            -sigmoid(-r_hat_uIJ / spec_alpha_u) / spec_alpha_u
                            + (1 - indicator_K) *
                            sigmoid(-r_hat_uJK)) / indicator_len(J)
                        dR_dbj = 2 * self.lambda_b * b_J / indicator_len(J)
                        df_dVj = df_dbj * U_u
                        dR_dVj = 2 * self.lambda_v * V_bar_J / indicator_len(J)

                        # update V_j = V_j + gamma * (df_dVj - dR_dVj)
                        self.V[:-1, sorted_J] += self.gamma * (
                            df_dVj - dR_dVj)[:, None]  # trick: transpose here
                        # update b_j = b_j + gamma * (df_dbj - dR_dbj)
                        self.V[-1, sorted_J] += self.gamma * (df_dbj - dR_dbj)

                        # NABLA V_k
                        df_dbk = (1 - indicator_K
                                  ) * -sigmoid(-r_hat_uJK) / indicator_len(K)
                        dR_dbk = (
                            1 - indicator_K
                        ) * 2 * self.lambda_b * b_K / indicator_len(K)
                        df_dVk = df_dbk * U_u
                        dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K)

                        # update V_k = V_k + gamma * (df_dVk - dR_dVk)
                        self.V[:-1, sorted_K] += self.gamma * (
                            df_dVk - dR_dVk)[:, None]  # trick: transpose here
                        # update b_k = b_k + gamma * (df_dbk - dR_dbk)
                        self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk)

                else:
                    # these are the cases when at least two sets are empty
                    # at these cases, we ignore this user and continue the loop
                    continue

                # calculate loss
                # f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK))
                # regula = self.lambda_u * np.linalg.norm(U_u, ord=2) + self.lambda_v * (
                #        (np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + (
                #            np.linalg.norm(V_bar_J, ord=2) if len(J) != 0 else 0) + (
                #            np.linalg.norm(V_bar_K, ord=2)) if len(K) != 0 else 0) + self.lambda_b * (
                #                     (b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + (
                #                 b_K if len(K) != 0 else 0) ** 2)
                # bprh_loss = f_Theta - regula

                # update estimation
                old_estimation = self.estimation.copy()
                # self.estimation = np.dot(self.U, self.V)
                all_sampled_item = sorted(set.union(I, J, K))
                # for sampled_item in all_sampled_item:
                #    self.estimation[:, sampled_item] = np.dot(self.U, self.V[:, sampled_item])
                self.estimation[:, all_sampled_item] = np.dot(
                    self.U, self.V[:, all_sampled_item])
                # estimation changed
                est_changed = np.linalg.norm(self.estimation - old_estimation)

                # we only save model to file when the num of iter % iter_to_save == 0
                if (index + 1) % iter_to_save == 0:
                    self.save(model_path=model_saved_path + "_" + str(index))

                # we only calculate metric when the num of iter % iter_to_log == 0
                if (index + 1) % iter_to_log == 0:
                    if log_metric | plot_metric:
                        # calculate metrics on test data
                        user_to_eval = sorted(set(self.test_data.UserID))
                        scoring_list_5, precision_5, recall_5, avg_auc = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=5,
                            train_data_as_reference_flag=True)
                        scoring_list_10, precision_10, recall_10, _ = self.scoring(
                            user_to_eval=user_to_eval,
                            ground_truth=self.test_data,
                            K=10,
                            train_data_as_reference_flag=True)
                    if log_metric:
                        self.eval_hist.append([
                            index, precision_5, precision_10, recall_5,
                            recall_10, avg_auc
                        ])

                    if plot_metric:
                        plot_losses.update({
                            'Precision@5': precision_5,
                            'Precision@10': precision_10,
                            'Recall@5': recall_5,
                            'Recall@10': recall_10,
                            'AUC': avg_auc
                        })
                        plot_losses.send()

                # Postfix will be displayed on the right,
                # formatted automatically based on argument's datatype
                t.set_postfix(est_changed=est_changed,
                              len_I=len(I),
                              len_J=len(J),
                              len_K=len(K))
예제 #24
0
def train_cross_validation(model_cls,
                           dataset,
                           dropout=0.0,
                           lr=1e-3,
                           weight_decay=1e-2,
                           num_epochs=200,
                           n_splits=10,
                           use_gpu=True,
                           dp=False,
                           ddp=False,
                           comment='',
                           tb_service_loc='192.168.192.57:6007',
                           batch_size=1,
                           num_workers=0,
                           pin_memory=False,
                           cuda_device=None,
                           tb_dir='runs',
                           model_save_dir='saved_models',
                           res_save_dir='res',
                           fold_no=None,
                           saved_model_path=None,
                           device_ids=None,
                           patience=20,
                           seed=None,
                           fold_seed=None,
                           save_model=False,
                           is_reg=True,
                           live_loss=True,
                           domain_cls=True,
                           final_cls=True):
    """
    :type fold_seed: int
    :param live_loss: bool
    :param is_reg: bool
    :param save_model: bool
    :param seed:
    :param patience: for early stopping
    :param device_ids: for ddp
    :param saved_model_path:
    :param fold_no: int
    :param ddp_port: str
    :param ddp: DDP
    :param cuda_device: list of int
    :param pin_memory: bool, DataLoader args
    :param num_workers: int, DataLoader args
    :param model_cls: pytorch Module cls
    :param dataset: instance
    :param dropout: float
    :param lr: float
    :param weight_decay:
    :param num_epochs:
    :param n_splits: number of kFolds
    :param use_gpu: bool
    :param dp: bool
    :param comment: comment in the logs, to filter runs in tensorboard
    :param tb_service_loc: tensorboard service location
    :param batch_size: Dataset args not DataLoader
    :return:
    """
    saved_args = locals()
    seed = int(time.time() % 1e4 * 1e5) if seed is None else seed
    saved_args['random_seed'] = seed

    torch.manual_seed(seed)
    np.random.seed(seed)
    if use_gpu:
        torch.cuda.manual_seed_all(seed)
        # torch.backends.cudnn.deterministic = True
        # torch.backends.cudnn.benchmark = False

    model_name = model_cls.__name__

    if not cuda_device:
        if device_ids and dp:
            device = device_ids[0]
        else:
            device = torch.device(
                'cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
    else:
        device = cuda_device

    device_count = torch.cuda.device_count() if dp else 1
    device_count = len(device_ids) if (device_ids is not None
                                       and dp) else device_count

    batch_size = batch_size * device_count

    # TensorBoard
    log_dir_base = get_model_log_dir(comment, model_name)
    if tb_service_loc is not None:
        print("TensorBoard available at http://{1}/#scalars&regexInput={0}".
              format(log_dir_base, tb_service_loc))
    else:
        print("Please set up TensorBoard")

    # model
    criterion = nn.NLLLoss()

    print("Training {0} {1} models for cross validation...".format(
        n_splits, model_name))
    # 1
    # folds, fold = KFold(n_splits=n_splits, shuffle=False, random_state=seed), 0
    # 2
    # folds = GroupKFold(n_splits=n_splits)
    # iter = folds.split(np.zeros(len(dataset)), groups=dataset.data.site_id)
    # 4
    # folds = StratifiedKFold(n_splits=n_splits, random_state=fold_seed, shuffle=True if fold_seed else False)
    # iter = folds.split(np.zeros(len(dataset)), dataset.data.y.numpy(), groups=dataset.data.subject_id)
    # 5
    fold = 0
    iter = multi_site_cv_split(dataset.data.y,
                               dataset.data.site_id,
                               dataset.data.subject_id,
                               n_splits,
                               random_state=fold_seed,
                               shuffle=True if fold_seed else False)

    for train_idx, val_idx in tqdm_notebook(iter, desc='CV', leave=False):
        fold += 1
        liveloss = PlotLosses() if live_loss else None

        # for a specific fold
        if fold_no is not None:
            if fold != fold_no:
                continue

        writer = SummaryWriter(log_dir=osp.join('runs', log_dir_base +
                                                str(fold)))
        model_save_dir = osp.join('saved_models', log_dir_base + str(fold))

        print("creating dataloader tor fold {}".format(fold))

        train_dataset, val_dataset = norm_train_val(dataset, train_idx,
                                                    val_idx)

        model = model_cls(writer)

        train_dataloader = DataLoader(train_dataset,
                                      shuffle=True,
                                      batch_size=batch_size,
                                      collate_fn=lambda data_list: data_list,
                                      num_workers=num_workers,
                                      pin_memory=pin_memory)
        val_dataloader = DataLoader(val_dataset,
                                    shuffle=False,
                                    batch_size=batch_size,
                                    collate_fn=lambda data_list: data_list,
                                    num_workers=num_workers,
                                    pin_memory=pin_memory)

        if fold == 1 or fold_no is not None:
            print(model)
            writer.add_text('model_summary', model.__repr__())
            writer.add_text('training_args', str(saved_args))

        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=lr,
                                      betas=(0.9, 0.999),
                                      eps=1e-08,
                                      weight_decay=weight_decay,
                                      amsgrad=False)
        # scheduler_reduce = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
        scheduler = GradualWarmupScheduler(optimizer,
                                           multiplier=10,
                                           total_epoch=5)
        # scheduler = scheduler_reduce
        # optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
        if dp and use_gpu:
            model = model.cuda() if device_ids is None else model.to(
                device_ids[0])
            model = DataParallel(model, device_ids=device_ids)
        elif use_gpu:
            model = model.to(device)

        if saved_model_path is not None:
            model.load_state_dict(torch.load(saved_model_path))

        best_map, patience_counter, best_score = 0.0, 0, np.inf
        for epoch in tqdm_notebook(range(1, num_epochs + 1),
                                   desc='Epoch',
                                   leave=False):
            logs = {}

            # scheduler.step(epoch=epoch, metrics=best_score)

            for phase in ['train', 'validation']:

                if phase == 'train':
                    model.train()
                    dataloader = train_dataloader
                else:
                    model.eval()
                    dataloader = val_dataloader

                # Logging
                running_total_loss = 0.0
                running_corrects = 0
                running_reg_loss = 0.0
                running_nll_loss = 0.0
                epoch_yhat_0, epoch_yhat_1 = torch.tensor([]), torch.tensor([])
                epoch_label, epoch_predicted = torch.tensor([]), torch.tensor(
                    [])

                logging_hist = True if phase == 'train' else False  # once per epoch
                for data_list in tqdm_notebook(dataloader,
                                               desc=phase,
                                               leave=False):

                    # TODO: check devices
                    if dp:
                        data_list = to_cuda(data_list,
                                            (device_ids[0] if device_ids
                                             is not None else 'cuda'))

                    y_hat, domain_yhat, reg = model(data_list)

                    y = torch.tensor([],
                                     dtype=dataset.data.y.dtype,
                                     device=device)
                    domain_y = torch.tensor([],
                                            dtype=dataset.data.site_id.dtype,
                                            device=device)
                    for data in data_list:
                        y = torch.cat([y, data.y.view(-1).to(device)])
                        domain_y = torch.cat(
                            [domain_y,
                             data.site_id.view(-1).to(device)])

                    loss = criterion(y_hat, y)
                    domain_loss = criterion(domain_yhat, domain_y)
                    # domain_loss = -1e-7 * domain_loss
                    # print(domain_loss.item())
                    if domain_cls:
                        total_loss = domain_loss
                        _, predicted = torch.max(domain_yhat, 1)
                        label = domain_y
                    if final_cls:
                        total_loss = loss
                        _, predicted = torch.max(y_hat, 1)
                        label = y
                    if domain_cls and final_cls:
                        total_loss = (loss + domain_loss).sum()
                        _, predicted = torch.max(y_hat, 1)
                        label = y

                    if is_reg:
                        total_loss += reg.sum()

                    if phase == 'train':
                        # print(torch.autograd.grad(y_hat.sum(), model.saved_x, retain_graph=True))
                        optimizer.zero_grad()
                        total_loss.backward()
                        nn.utils.clip_grad_norm_(model.parameters(), 2.0)
                        optimizer.step()

                    running_nll_loss += loss.item()
                    running_total_loss += total_loss.item()
                    running_reg_loss += reg.sum().item()
                    running_corrects += (predicted == label).sum().item()

                    epoch_yhat_0 = torch.cat(
                        [epoch_yhat_0, y_hat[:, 0].detach().view(-1).cpu()])
                    epoch_yhat_1 = torch.cat(
                        [epoch_yhat_1, y_hat[:, 1].detach().view(-1).cpu()])
                    epoch_label = torch.cat(
                        [epoch_label,
                         label.detach().float().view(-1).cpu()])
                    epoch_predicted = torch.cat([
                        epoch_predicted,
                        predicted.detach().float().view(-1).cpu()
                    ])

                # precision = sklearn.metrics.precision_score(epoch_label, epoch_predicted, average='micro')
                # recall = sklearn.metrics.recall_score(epoch_label, epoch_predicted, average='micro')
                # f1_score = sklearn.metrics.f1_score(epoch_label, epoch_predicted, average='micro')
                accuracy = sklearn.metrics.accuracy_score(
                    epoch_label, epoch_predicted)
                epoch_total_loss = running_total_loss / dataloader.__len__()
                epoch_nll_loss = running_nll_loss / dataloader.__len__()
                epoch_reg_loss = running_reg_loss / dataloader.__len__()

                # print('epoch {} {}_nll_loss: {}'.format(epoch, phase, epoch_nll_loss))
                writer.add_scalars(
                    'nll_loss', {'{}_nll_loss'.format(phase): epoch_nll_loss},
                    epoch)
                writer.add_scalars('accuracy',
                                   {'{}_accuracy'.format(phase): accuracy},
                                   epoch)
                # writer.add_scalars('{}_APRF'.format(phase),
                #                    {
                #                        'accuracy': accuracy,
                #                        'precision': precision,
                #                        'recall': recall,
                #                        'f1_score': f1_score
                #                    },
                #                    epoch)
                if epoch_reg_loss != 0:
                    writer.add_scalars(
                        'reg_loss'.format(phase),
                        {'{}_reg_loss'.format(phase): epoch_reg_loss}, epoch)
                # print(epoch_reg_loss)
                # writer.add_histogram('hist/{}_yhat_0'.format(phase),
                #                      epoch_yhat_0,
                #                      epoch)
                # writer.add_histogram('hist/{}_yhat_1'.format(phase),
                #                      epoch_yhat_1,
                #                      epoch)

                # Save Model & Early Stopping
                if phase == 'validation':
                    model_save_path = model_save_dir + '-{}-{}-{:.3f}-{:.3f}'.format(
                        model_name, epoch, accuracy, epoch_nll_loss)
                    # best score
                    if accuracy > best_map:
                        best_map = accuracy
                        model_save_path = model_save_path + '-best'

                    score = epoch_nll_loss
                    if score < best_score:
                        patience_counter = 0
                        best_score = score
                    else:
                        patience_counter += 1

                    # skip first 10 epoch
                    # best_score = best_score if epoch > 10 else -np.inf

                    if save_model:
                        for th, pfix in zip(
                            [0.8, 0.75, 0.7, 0.5, 0.0],
                            ['-perfect', '-great', '-good', '-bad', '-miss']):
                            if accuracy >= th:
                                model_save_path += pfix
                                break

                        torch.save(model.state_dict(), model_save_path)

                    writer.add_scalars('best_val_accuracy',
                                       {'{}_accuracy'.format(phase): best_map},
                                       epoch)
                    writer.add_scalars(
                        'best_nll_loss',
                        {'{}_nll_loss'.format(phase): best_score}, epoch)

                    writer.add_scalars('learning_rate', {
                        'learning_rate':
                        scheduler.optimizer.param_groups[0]['lr']
                    }, epoch)

                    if patience_counter >= patience:
                        print("Stopped at epoch {}".format(epoch))
                        return

                if live_loss:
                    prefix = ''
                    if phase == 'validation':
                        prefix = 'val_'

                    logs[prefix + 'log loss'] = epoch_nll_loss
                    logs[prefix + 'accuracy'] = accuracy
            if live_loss:
                liveloss.update(logs)
                liveloss.draw()

    print("Done !")
            'net_state_dict': net.state_dict(),
            #             'acc': test_correct / test_total,
            #             'optimizer_state_dict': optimizer.state_dict()
        }
        if not os.path.isdir('./checkpoint/Sqnet_1x_v1.0'):
            os.makedirs('./checkpoint/Sqnet_1x_v1.0')
        torch.save(state,
                   './checkpoint/Sqnet_1x_v1.0/Sqnet_1x_v1.0_Cifar10.ckpt')
        best_acc = test_correct / test_total


#        checkpoint = torch.load('./checkpoint/Sqnet_1x_v1.0/Sqnet_1x_v1.0_Cifar10.ckpt')
#        net.load_state_dict(checkpoint['net_state_dict'])
#        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

liveloss = PlotLosses()
for _epoch in range(start_epoch, start_epoch + num_epochs):
    start_time = time.time()
    train(_epoch)
    print()
    test(_epoch)
    print()
    print()
    end_time = time.time()
    print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time))
    best_cost = end_time - start_time
    if end_time - start_time < best_cost:
        best_cost = end_time - start_time

    liveloss.update({
        'log loss': train_loss,
예제 #26
0
def main():
    global best_test_bpd

    last_checkpoints = []
    lipschitz_constants = []
    ords = []

    # if args.resume:
    #     validate(args.begin_epoch - 1, model, ema)

    #liveloss = PlotLosses()

    #liveloss = PlotLosses()
    liveloss = PlotLosses()

    for epoch in range(args.begin_epoch, args.nepochs):
        logs = {}

        logger.info('Current LR {}'.format(optimizer.param_groups[0]['lr']))

        running_loss = train(epoch, model)

        #train(epoch, model)
        lipschitz_constants.append(get_lipschitz_constants(model))

        #ords.append(get_ords(model))

        #ords.append(get_ords(model))
        ords.append(get_ords(model))

        logger.info('Lipsh: {}'.format(pretty_repr(lipschitz_constants[-1])))
        logger.info('Order: {}'.format(pretty_repr(ords[-1])))

        #epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_loss = running_loss / len(
            datasets.CIFAR10(
                args.dataroot, train=True, transform=transform_train))

        logs['log loss'] = epoch_loss.item()

        liveloss.update(logs)
        liveloss.draw()

        if args.ema_val:
            test_bpd = validate(epoch, model, ema)
        else:
            test_bpd = validate(epoch, model)

        if args.scheduler and scheduler is not None:
            scheduler.step()

        if test_bpd < best_test_bpd:
            best_test_bpd = test_bpd

            utils.save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'args': args,
                    'ema': ema,
                    'test_bpd': test_bpd,
                },
                os.path.join(args.save, 'moMoModels'),
                epoch,
                last_checkpoints,
                num_checkpoints=5)
            """
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'mMoModels'), epoch, last_checkpoints, num_checkpoints=5)
            
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'mModels'), epoch, last_checkpoints, num_checkpoints=5)
            
            utils.save_checkpoint({
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'models'), epoch, last_checkpoints, num_checkpoints=5)
            """

        torch.save(
            {
                'state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'args': args,
                'ema': ema,
                'test_bpd': test_bpd,
            }, os.path.join(args.save, 'models',
                            '010mmoosttMoosttRecentt.pth'))
        """
예제 #27
0
def train_model(output_path, model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=5, scheduler=None):
	if not os.path.exists('models/'+str(output_path)):
		os.makedirs('models/'+str(output_path))
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	since = time.time()
	liveloss = PlotLosses()
	best_model_wts = copy.deepcopy(model.state_dict())
	best_acc = 0.0
	best = 0
	for epoch in range(num_epochs):
		print('Epoch {}/{}'.format(epoch+1, num_epochs))
		print('-' * 10)

		# Each epoch has a training and validation phase
		for phase in ['train', 'val']:
			if phase == 'train':
				
				if scheduler != None:
					scheduler.step()
				model.train()  # Set model to training mode
			else:
				pbar = dataloaders[phase]
				model.eval()   # Set model to evaluate mode

			running_loss = 0.0
			running_corrects = 0

			# Iterate over data.
			pbar = tqdm(dataloaders[phase])
			for i,(inputs, labels) in enumerate(pbar):
				inputs = inputs.to(device)
				labels = labels.to(device)

				# zero the parameter gradients
				optimizer.zero_grad()

				# forward
				# track history if only in train
				with torch.set_grad_enabled(phase == 'train'):
					outputs = model(inputs)
					_, preds = torch.max(outputs, 1)
					loss = criterion(outputs, labels)

					# backward + optimize only if in training phase
					if phase == 'train':
						loss.backward()
						optimizer.step()

				# statistics
				running_loss += loss.item() * inputs.size(0)
				running_corrects += torch.sum(preds == labels.data)
				#print("\rIteration: {}/{}, Loss: {}.".format(i+1, len(dataloaders[phase]), loss.item() * inputs.size(0)), end="")

#				 print( (i+1)*100. / len(dataloaders[phase]), "% Complete" )
				pbar.set_description(desc= f'Loss={loss.item()} Batch_id={i} ')
				
				
			epoch_loss = running_loss / dataset_sizes[phase]
			epoch_acc = running_corrects.double() / dataset_sizes[phase]
			if phase == 'train':
				avg_loss = epoch_loss
				t_acc = epoch_acc
			else:
				val_loss = epoch_loss
				val_acc = epoch_acc
			
#			 print('{} Loss: {:.4f} Acc: {:.4f}'.format(
#				 phase, epoch_loss, epoch_acc))

			# deep copy the model
			if phase == 'val' and epoch_acc > best_acc:
				best_acc = epoch_acc
				best = epoch + 1
				best_model_wts = copy.deepcopy(model.state_dict())
				
		liveloss.update({
			'log loss': avg_loss,
			'val_log loss': val_loss,
			'accuracy': t_acc,
			'val_accuracy': val_acc
		})
				
		#liveloss.draw()
		print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc))
		print(  'Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc))
		print()
		torch.save(model.state_dict(), './models/' + str(output_path) + '/model_{}_epoch.pt'.format(epoch+1))
	time_elapsed = time.time() - since
	print('Training complete in {:.0f}m {:.0f}s'.format(
		time_elapsed // 60, time_elapsed % 60))
	print('Best Validation Accuracy: {}, Epoch: {}'.format(best_acc, best))
예제 #28
0
 def on_start(self, state):
     from livelossplot import PlotLosses
     self.plt = PlotLosses(**self._kwargs)
     self.batch_plt = PlotLosses(**self._kwargs)
예제 #29
0
class LiveLossPlot(Callback):
    """
    Callback to write metrics to `LiveLossPlot <https://github.com/stared/livelossplot>`_, a library for visualisation in notebooks

    Example: ::

        >>> import torch.nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks import LiveLossPlot

        # Example Trial which clips all model gradients norms at 2 under the L1 norm.
        >>> model = torch.nn.Linear(1,1)
        >>> live_loss_plot = LiveLossPlot()
        >>> trial = Trial(model, callbacks=[live_loss_plot], metrics=['acc'])

    Args:
        on_batch (bool): If True, batch metrics will be logged. Else batch metrics will not be logged
        batch_step_size (int): The number of batches between logging metrics
        on_epoch (bool): If True, epoch metrics will be logged every epoch. Else epoch metrics will not be logged
        draw_once (bool): If True, draw the plot only at the end of training. Else draw every time metrics are logged
        kwargs: Keyword arguments for livelossplot.PlotLosses

    State Requirements:
        - :attr:`torchbearer.state.METRICS`: Metrics should be a dict containing the metrics to be plotted
        - :attr:`torchbearer.state.BATCH`: Batch should be the current batch or iteration number in the epoch
    """
    def __init__(self,
                 on_batch=False,
                 batch_step_size=10,
                 on_epoch=True,
                 draw_once=False,
                 **kwargs):
        super(LiveLossPlot, self).__init__()
        self._kwargs = kwargs

        self.on_batch = on_batch
        self.on_epoch = on_epoch
        self.draw_once = draw_once
        self.batch_step_size = batch_step_size

        if on_batch:
            self.on_step_training = self._on_step_training

        if on_epoch:
            self.on_end_epoch = self._on_end_epoch

    def on_start(self, state):
        from livelossplot import PlotLosses
        self.plt = PlotLosses(**self._kwargs)
        self.batch_plt = PlotLosses(**self._kwargs)

    def _on_step_training(self, state):
        self.batch_plt.update({
            k: get_metric('LiveLossPlot', state, k)
            for k in state[torchbearer.METRICS]
        })
        if state[torchbearer.
                 BATCH] % self.batch_step_size == 0 and not self.draw_once:
            with no_print():
                self.batch_plt.draw()

    def _on_end_epoch(self, state):
        self.plt.update({
            k: get_metric('LiveLossPlot', state, k)
            for k in state[torchbearer.METRICS]
        })
        if not self.draw_once:
            with no_print():
                self.plt.draw()

    def on_end(self, state):
        if self.draw_once:
            with no_print():
                self.batch_plt.draw()
                self.plt.draw()
예제 #30
0
# In[5]:


names = ['airplane', 'onion', 'apple', 'pineapple', 'ant', 'banana', 'ambulance', 'angel', 'cat', 'cow', 'broccoli', 'bus']
amount = 1000
device = torch.device("cpu")
model = Net(len(names))
X_train, Y_train, X_test, Y_test = data_load(names, amount)        
train_loader  = DataLoader(TensorDataset(X_train,Y_train),
                        batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, Y_test),
                        batch_size=32, shuffle=False)  
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

liveloss = PlotLosses()


# In[6]:


def conv_train_model(epoch): #stosuje wcześniej zdefiniowane funkcje, argument: 
                                   #epoch-ilość przejśc przez dane treningowe, 
                                   #names-tablica nazw klas,
                                   #amount-ilość danych,
    for epoch in range(epoch):

        avg_loss, avg_accuracy = conv_train_step(model, device, train_loader, optimizer, epoch)  
        avg_loss_val, avg_accuracy_val = conv_test_step(model, device, test_loader)
    
        liveloss.update({