Beispiel #1
0
    def __init__(self,
                 params,
                 lr=1e-3,
                 eps=1e-8,
                 alpha=1e-7,
                 beta=1e-5,
                 gamma=0.9,
                 momentum=1,
                 sgd_steps=5,
                 K=10):

        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 1 >= momentum:
            raise ValueError("Invalid momentum value: {}".format(eps))

        self.iter = 0
        self.sgd = SGD(params, lr=lr, momentum=0.9)

        param_count = np.sum([np.prod(p.size())
                              for p in params])  # got from MNIST-GAN

        defaults = dict(lr=lr,
                        eps=eps,
                        alpha=alpha,
                        beta=beta * param_count,
                        gamma=gamma,
                        sgd_steps=sgd_steps,
                        momentum=momentum,
                        K=K)

        super(Neumann, self).__init__(params, defaults)
Beispiel #2
0
def split_optimizer(model: nn.Module, cfg: dict):
    param_weight_decay, param_bias, param_other = split_params(model)
    if len(param_other) != 0:
        if cfg['optimizer'] == 'Adam':
            optimizer = Adam(param_other, lr=cfg['lr'])
        elif cfg['optimizer'] == 'SGD':
            optimizer = SGD(param_other,
                            lr=cfg['lr'],
                            momentum=cfg['momentum'])
        else:
            raise NotImplementedError("optimizer {:s} is not support!".format(
                cfg['optimizer']))
        optimizer.add_param_group({
            'params': param_weight_decay,
            'weight_decay': cfg['weight_decay']
        })  # add pg1 with weight_decay
        optimizer.add_param_group({'params': param_bias})
    else:
        if cfg['optimizer'] == 'Adam':
            optimizer = Adam(param_weight_decay,
                             lr=cfg['lr'],
                             weight_decay=cfg['weight_decay'])
        elif cfg['optimizer'] == 'SGD':
            optimizer = SGD(param_weight_decay,
                            lr=cfg['lr'],
                            momentum=cfg['momentum'],
                            weight_decay=cfg['weight_decay'])
        else:
            raise NotImplementedError("optimizer {:s} is not support!".format(
                cfg['optimizer']))
        optimizer.add_param_group({'params': param_bias})
    return optimizer
Beispiel #3
0
def optimizer_choose(model, args, writer, block):
    params = []
    for key, value in model.named_parameters():
        if value.requires_grad:
            params += [{
                'params': [value],
                'lr': args.lr,
                'key': key,
                'weight_decay': args.wd
            }]

    if args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params)
        block.log('Using Adam optimizer')
    elif args.optimizer == 'sgd':
        momentum = 0.9
        optimizer = SGD(params, momentum=momentum)
        block.log('Using SGD with momentum ' + str(momentum))
    elif args.optimizer == 'sgd_nev':
        momentum = 0.9
        optimizer = SGD(params, momentum=momentum, nesterov=True)
        block.log('Using SGD with momentum ' + str(momentum) + 'and nesterov')
    else:
        momentum = 0.9
        optimizer = SGD(params, momentum=momentum)
        block.log('Using SGD with momentum ' + str(momentum))

    # shutil.copy2(inspect.getfile(optimizer), args.model_saved_name)
    shutil.copy2(__file__, args.model_saved_name)
    return optimizer
Beispiel #4
0
    def step(self, optimizer: SGD, *args, **kwargs) -> Optional[float]:  # type: ignore
        """
        :meth:`step` carries out the following two operations:

        1.  Internally invokes ``unscale_(optimizer)`` (unless :meth:`unscale_` was explicitly called for ``optimizer``
            earlier in the iteration).  As part of the :meth:`unscale_`, gradients are checked for infs/NaNs.
        2.  If no inf/NaN gradients are found, invokes ``optimizer.step()`` using the unscaled
            gradients.  Otherwise, ``optimizer.step()`` is skipped to avoid corrupting the params.

        ``*args`` and ``**kwargs`` are forwarded to ``optimizer.step()``.

        Returns the return value of ``optimizer.step(*args, **kwargs)``.

        Args:
            optimizer (torch.optim.Optimizer):  Optimizer that applies the gradients.
            args:  Any arguments.
            kwargs:  Any keyword arguments.

        .. warning::
            Closure use is not currently supported.

        Note: This is an exact copy of the step function in grad_scaler.py. If this copy is deleted then the
        unittest test_cpu_offload_and_cpu_grads fails. This is because the parent class step function calls
        the parent class unscale_ function which does not handle torch.distributed.all_reduce on cpu.
        """
        if not self._enabled:
            return optimizer.step(*args, **kwargs)

        if "closure" in kwargs:
            raise RuntimeError("Closure use is not currently supported if GradScaler is enabled.")

        self._check_scale_growth_tracker("step")  # type: ignore

        optimizer_state = self._per_optimizer_states[id(optimizer)]

        if optimizer_state["stage"] is OptState.STEPPED:
            raise RuntimeError("step() has already been called since the last update().")

        retval = None

        if hasattr(optimizer, "_step_supports_amp_scaling") and optimizer._step_supports_amp_scaling:
            # This optimizer has customized scale-handling logic, so we can call optimizer.step() directly.
            # The contract with custom optimizers is that their step() should accept an additional,
            # optional grad_scaler kwarg.  We append self to the kwargs so the custom optimizer has full information:
            # it can query its own state, invoke unscale_ on itself, etc
            retval = optimizer.step(*args, **dict(kwargs, grad_scaler=self))
            optimizer_state["stage"] = OptState.STEPPED
            return retval

        if optimizer_state["stage"] is OptState.READY:
            self.unscale_(optimizer)

        assert len(optimizer_state["found_inf_per_device"]) > 0, "No inf checks were recorded for this optimizer."
        retval = self._maybe_opt_step(optimizer, optimizer_state, *args, **kwargs)  # type: ignore
        optimizer_state["stage"] = OptState.STEPPED
        return retval
Beispiel #5
0
    def _initOptimizer(self):
        modelParallel = self.getModelParallel()
        args = self.getArgs()

        optimizer = SGD(modelParallel.parameters(),
                        args.learning_rate,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay)
        # load optimizer pre-trained state dict if exists
        if self.optimizerStateDict:
            optimizer.load_state_dict(self.optimizerStateDict)

        return optimizer
Beispiel #6
0
def create_model(args, model=None):
    # Create MVCCN model based on the given architecture.
    if model is None:
        model = SVCNN(nclasses=args.num_classes,
                      pretraining=args.pretrained,
                      cnn_name=args.arch,
                      feature_extraction=args.feature_extraction)
    else:
        model = MVCNN(model, num_views=args.nview)

    # Multi GPUs
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
    # Send model to GPU or keep it to the CPU
    model = model.to(device=args.device)

    if args.optimizer == "ADAM":
        optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()),
                         args.learning_rate,
                         weight_decay=args.weight_decay)
    elif args.optimizer == "ADAGRAD":
        optimizer = Adagrad(filter(lambda p: p.requires_grad,
                                   model.parameters()),
                            args.learning_rate,
                            weight_decay=args.weight_decay)
    else:
        # If we use feature extraction (features weights are frozen), we need to keep only differentiable params
        optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()),
                        args.learning_rate,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay)
    return model, optimizer
    def configure_optimizers(self):
        # Add args to define the values
        params_list = [{
            'params': self.model.parameters(),
            'lr': 0.01
        }, {
            'params': self.classifier0.parameters()
        }, {
            'params': self.classifier1.parameters()
        }, {
            'params': self.classifier2.parameters()
        }, {
            'params': self.classifier3.parameters()
        }, {
            'params': self.classifier4.parameters()
        }, {
            'params': self.classifier5.parameters()
        }]

        optim = SGD(params_list,
                    lr=self.learning_rate,
                    weight_decay=5e-4,
                    momentum=0.9,
                    nesterov=True)

        scheduler = lr_scheduler.StepLR(optim, step_size=40, gamma=0.130)

        return [optim], [scheduler]
Beispiel #8
0
def test_trainer_train_full(fake_loader, simple_neural_net):
    def transform_fn(batch):
        inputs, y_true = batch
        return inputs, y_true.float()

    metrics = [BinaryAccuracy()]

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_loader = DataLoader(FakeData(size=100,
                                       image_size=(3, 32, 32),
                                       num_classes=2,
                                       transform=transform),
                              batch_size=4,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(FakeData(size=50,
                                     image_size=(3, 32, 32),
                                     num_classes=2,
                                     transform=transform),
                            batch_size=4,
                            shuffle=True,
                            num_workers=1)

    model = Net()
    loss = nn.BCELoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)

    plotter = VisdomLinePlotter(env_name=f'Model {11}')

    callbacks = [
        ProgressBar(log_every=10),
        VisdomEpoch(plotter, on_iteration_every=10),
        VisdomEpoch(plotter, on_iteration_every=10, monitor='binary_acc'),
        EarlyStoppingEpoch(min_delta=0.1,
                           monitor='val_running_loss',
                           patience=10),
        ReduceLROnPlateauCallback(factor=0.1,
                                  threshold=0.1,
                                  patience=2,
                                  verbose=True)
    ]

    trainer = TorchTrainer(model)
    trainer.prepare(optimizer,
                    loss,
                    train_loader,
                    val_loader,
                    transform_fn=transform_fn,
                    callbacks=callbacks,
                    metrics=metrics)

    epochs = 10
    batch_size = 10
    trainer.train(epochs, batch_size)
Beispiel #9
0
def create_optimizer(name, parameters, lr):
    if name == 'Adadelta':
        return Adadelta(parameters, lr=lr)
    elif name == 'Adam':
        return Adam(parameters, lr=lr)
    elif name == 'SGD':
        return SGD(parameters, lr=lr)
    else:
        raise KeyError(
            'Unknown optimizer type {!r}. Choose from [Adadelta | Adam | SGD]')
Beispiel #10
0
    def train(self):
        args = self.args
        model = self.model
        logger = self.logger
        epochRange = self._getEpochRange(self.nEpochs)

        # init optimizer
        optimizer = SGD(model.alphas(),
                        args.search_learning_rate,
                        momentum=args.search_momentum,
                        weight_decay=args.search_weight_decay)
        # init scheduler
        scheduler = ReduceLROnPlateau(optimizer,
                                      mode='min',
                                      factor=0.95,
                                      patience=args.search_patience,
                                      min_lr=args.search_learning_rate_min)

        for epoch in epochRange:
            print('========== Epoch:[{}/{}] =============='.format(
                epoch, self.nEpochs))
            # init epoch train logger
            trainLogger = HtmlLogger(self.trainFolderPath, epoch)
            # set loggers dictionary
            loggersDict = {self.trainLoggerKey: trainLogger}

            # create epoch jobs
            epochDataRows = self._createEpochJobs(epoch)
            # add epoch data rows
            for jobDataRow in epochDataRows:
                logger.addDataRow(jobDataRow, trType='<tr bgcolor="#2CBDD6">')

            # train alphas
            # epochLossDict, alphasDataRow = self.trainAlphas(self._getNextSearchQueueDataLoader(), optimizer, epoch, loggersDict)
            epochLossDict, alphasDataRow = self.trainAlphas(
                self.valid_queue, optimizer, epoch, loggersDict)
            # update scheduler
            scheduler.step(epochLossDict.get(self.flopsLoss.totalKey()))

            # calc model choosePathAlphasAsPartition flops ratio
            model.choosePathAlphasAsPartition()
            # add values to alphas data row
            additionalData = {
                self.epochNumKey: epoch,
                self.lrKey: optimizer.param_groups[0]['lr'],
                self.validFlopsRatioKey: model.flopsRatio()
            }
            self._applyFormats(additionalData)
            # add alphas data row
            alphasDataRow.update(additionalData)
            logger.addDataRow(alphasDataRow)

            # save checkpoint
            save_checkpoint(self.trainFolderPath, model, optimizer,
                            epochLossDict)
Beispiel #11
0
def main():
    args = ArgumentsTrainVal().parse_args()

    print('***************************Arguments****************************')
    print(args)

    model, distribution = construct_model(args)
    print('--------------------------Model Info----------------------------')
    print(model)

    if args.resume_model is None:
        init_network(model)

    criterion = functional.cross_entropy
    optimizer = SGD(model.parameters(),
                    args.learning_rate,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)

    train_iterator, validate_iterator = construct_train_dataloaders(args)

    engine_args = [
        args.gpu_ids, model, criterion, distribution, train_iterator,
        validate_iterator, optimizer
    ]
    if args.num_classes == 1000 or args.num_classes == 1001:
        topk = [1, 5]
    else:
        topk = [1]

    # learning rate points
    lr_points = []
    if args.num_classes == 100:
        lr_points = [150, 225]
    elif args.num_classes == 1000 or args.num_classes == 1001:
        lr_points = [30, 60]
    print('==> Set lr_points for resnet54: {}'.format(lr_points))

    engine = construct_engine(*engine_args,
                              checkpoint_iter_freq=args.checkpoint_iter_freq,
                              checkpoint_epoch_freq=args.checkpoint_epoch_freq,
                              checkpoint_save_path=args.checkpoint_save_path,
                              iter_log_freq=args.iter_log_freq,
                              topk=topk,
                              num_classes=args.num_classes,
                              lambda_error=args.lambda_error,
                              environment=args.environment,
                              lr_points=lr_points)

    if args.ada_train:
        engine.ada_train(args.maxepoch)
    else:
        engine.resume(args.maxepoch, args.resume_epoch, args.resume_iteration)
Beispiel #12
0
def test_linreg(n_epochs):
    print('Test: linear regression')
    
    x_values = []
    y_values = []
    
    for i in range(5):
        x_values.append(i)
        y_values.append(5*i + 2 + torch.randn(1).data.item())
    
    x_data = np.array(x_values, dtype=np.float32).reshape(-1, 1)
    y_data = np.array(y_values, dtype=np.float32).reshape(-1, 1)
    
    answer = RegressionAnswer(LinearRegression(), lambda model : LBFGS(model.parameters()), torch.nn.MSELoss(), x_data, y_data).run(60)
    
    test = [
        RegressionTest('A2Grad-uni', LinearRegression(), lambda model : A2Grad(model.parameters(), 'uni', 1e-1), torch.nn.MSELoss(), x_data, y_data), 
        RegressionTest('A2Grad-inc', LinearRegression(), lambda model : A2Grad(model.parameters(), 'inc', 1e-1), torch.nn.MSELoss(), x_data, y_data),
        RegressionTest('A2Grad-exp', LinearRegression(), lambda model : A2Grad(model.parameters(), 'exp', 1e-1), torch.nn.MSELoss(), x_data, y_data),
        RegressionTest('Adam', LinearRegression(), lambda model : Adam(model.parameters()), torch.nn.MSELoss(), x_data, y_data),
        RegressionTest('SGD', LinearRegression(), lambda model : SGD(model.parameters(), lr=1e-2), torch.nn.MSELoss(), x_data, y_data),
        RegressionTest('LBFGS', LinearRegression(), lambda model : LBFGS(model.parameters()), torch.nn.MSELoss(), x_data, y_data)
        ]
    
    plt.figure(figsize=(14, 8))
    for i in range(len(test)):
        test[i].run(n_epochs)
        plt.plot(np.arange(1, n_epochs + 1), np.array(test[i].errors) - answer, label=test[i].name)
    plt.legend(fontsize=12, loc=1)
    plt.title('Linear regression')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.savefig('linear.png')
    
    plt.figure(figsize=(14, 8))
    for i in range(len(test)):
        plt.plot(np.arange(1, n_epochs + 1), np.array(test[i].errors) - answer, label=test[i].name)
    plt.legend(fontsize=12, loc=1)
    plt.ylim(0, 1e-5)
    plt.title('Linear regression')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.savefig('linear2.png')
    
    points = np.arange(10, n_epochs, 10)
    header = "method   "
    for i in points:
        header += "{}                   ".format(i)
    print(header)
    for i in range(len(test)):
        test[i].output(points)
    print('')
Beispiel #13
0
def test_trainer_train_without_plugins(fake_loader, simple_neural_net):
    train_loader = fake_loader
    val_loader = fake_loader

    loss = nn.BCELoss()
    optimizer = SGD(simple_neural_net.parameters(), lr=0.001, momentum=0.9)

    trainer = TorchTrainer(simple_neural_net)
    trainer.prepare(optimizer,
                    loss,
                    train_loader,
                    val_loader,
                    transform_fn=transform_fn)
    trainer.train(1, 4)
Beispiel #14
0
def train_model(training_file: str):

    oracles = read_oracles_from_file(DiscriminativeOracle, training_file)
    dataset = OracleDataset(oracles)
    dataset.load()
    dataset_loader = DataLoader(dataset, collate_fn=lambda x: x[0])

    parser = DiscriminativeRnnGrammar(action_store=dataset.action_store,
                                      word2id=dataset.word_store,
                                      pos2id=dataset.pos_store,
                                      non_terminal2id=dataset.nt_store)
    optimiser = SGD(parser.parameters(), 0.1)

    train_early_stopping(dataset_loader, dataset_loader, parser, optimiser)
Beispiel #15
0
    def __init__(self, hparams, ckpt_name, homedir, separate_history,
                 patience):
        self.hparams = hparams
        # batch size
        self.batch_size = 256
        # loader
        self.loader_train, self.loader_valid, self.loader_test = mnist_data_loader(
            self.batch_size, homedir)
        # model
        self.model = Network(hparams)
        # loss function
        self.loss_fn = nn.CrossEntropyLoss()
        # initial learning rate
        self.lr = hparams['lr']
        # momentum coef
        self.momentum = hparams['momentum']
        # optimizer
        self.optimizer = SGD(self.model.parameters(),
                             lr=self.lr,
                             momentum=self.momentum,
                             nesterov=True)
        # epoch
        self.epoch = 0
        # check point
        self.ckpt_dir = homedir + "ckpt"
        self.ckpt_name = ckpt_name
        # history
        self.separate_history = separate_history
        # patience
        self.patience = patience

        try:
            ckpt = self._load_checkpoint(self.ckpt_name)
            self.model.load_state_dict(ckpt['state_dict'])
            self.epoch = ckpt['current_epoch']
        except FileNotFoundError:
            pass
Beispiel #16
0
def preTraining():
    '''
    Creates the model, data and optimizor for training

    -----
    Returns
    -----
    data - tuple with caption_ids, good_image and bad_image
    optimizer - Adam optimizer from  mynn
    model - traiing model
    '''
    model = Model()
    data = ct.create_triples(300000, 25).reshape(-1,3)
    optim = SGD(model.parameters(), lr = 0.001, momentum = 0.9)
    return data, optim, model
Beispiel #17
0
def get_optimizer(project_variable, model):
    # project_variable = ProjectVariable()

    if project_variable.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=project_variable.learning_rate)

    elif project_variable.optimizer == 'sgd':
        optimizer = SGD(model.parameters(),
                        lr=project_variable.learning_rate,
                        momentum=project_variable.momentum)

    else:
        print('Error: optimizer %s not supported' % project_variable.optimizer)
        optimizer = None

    return optimizer
def test_train_MEG_swap():

    dataset_path = ["Z:\Desktop\sub8\\ball1_sss.fif"]

    dataset = MEG_Dataset(dataset_path, duration=1.0, overlap=0.0)

    train_len, valid_len, test_len = len_split(len(dataset))

    train_dataset, valid_dataset, test_dataset = random_split(
        dataset, [train_len, valid_len, test_len]
    )

    device = "cpu"

    trainloader = DataLoader(
        train_dataset, batch_size=10, shuffle=False, num_workers=1
    )

    validloader = DataLoader(
        valid_dataset, batch_size=2, shuffle=False, num_workers=1
    )

    epochs = 1

    with torch.no_grad():
        x, _, _ = iter(trainloader).next()
    n_times = x.shape[-1]

    net = models.MNet(n_times)

    optimizer = SGD(net.parameters(), lr=0.0001, weight_decay=5e-4)
    loss_function = torch.nn.MSELoss()

    model, _, _ = train(
        net,
        trainloader,
        validloader,
        optimizer,
        loss_function,
        device,
        epochs,
        10,
        0,
        "",
    )

    print("Test succeeded!")
Beispiel #19
0
def test_trainer_train_steplr(fake_loader, simple_neural_net):
    def transform_fn(batch):
        inputs, y_true = batch
        return inputs, y_true.float()

    metrics = [BinaryAccuracy()]

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_loader = DataLoader(FakeData(size=100,
                                       image_size=(3, 32, 32),
                                       num_classes=2,
                                       transform=transform),
                              batch_size=4,
                              shuffle=True,
                              num_workers=1)
    val_loader = DataLoader(FakeData(size=50,
                                     image_size=(3, 32, 32),
                                     num_classes=2,
                                     transform=transform),
                            batch_size=4,
                            shuffle=True,
                            num_workers=1)

    model = Net()
    loss = nn.BCELoss()
    optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)

    callbacks = [StepLREpochCallback()]

    trainer = TorchTrainer(model)
    trainer.prepare(optimizer,
                    loss,
                    train_loader,
                    val_loader,
                    transform_fn=transform_fn,
                    callbacks=callbacks,
                    metrics=metrics)

    epochs = 10
    batch_size = 10
    trainer.train(epochs, batch_size)
Beispiel #20
0
def get_optimizer(model: nn.Module, optim: str, lr: float) -> Optimizer:
    """
    Return the optimizer that corresponds to string optim. Add the parameters from model and set learning rate to lr
    :param model: model to get the parameters from
    :param optim: name of the optimizer
    :param lr: learning rate to use in the optimizer
    :return:
    """
    if optim == "adagrad":
        return Adagrad(model.parameters(), lr=lr)
    elif optim == "sgd":
        return SGD(model.parameters(), lr=lr)
    elif optim == "rmsprop":
        return RMSprop(model.parameters(), lr=lr)
    elif optim == "adam":
        return Adam(model.parameters(), lr=lr)
    else:
        raise ValueError("Invalid optimizer")
Beispiel #21
0
def learn_second(network,
                 lr,
                 model,
                 examples_files,
                 total_example,
                 alpha=1.0,
                 batch_size=20):
    """
    Helper function used to optimize O2
    :param network: network model to optimize
    :param lr: learning rate
    :param model: model containing the shared data
    :param examples_files: list of files containing the examples
    :param total_example: total example for training
    :param alpha: trade-off param
    :param batch_size: size of the batch
    :return: loss value
    """

    num_batch = 0

    log.info("compute o2")
    optimizer = SGD(network.parameters(), lr)
    log.debug("read example file: {}".format("\t".join(examples_files)))
    loss_val = 0

    if alpha <= 0:
        return loss_val

    for batch in emb_utils.batch_generator(emb_utils.prepare_sentences(
            model, graph_utils.combine_example_files_iter(examples_files),
            network.transfer_fn(model.vocab)),
                                           batch_size,
                                           long_tensor=LongTensor):
        input, output = batch
        loss = (alpha * network.forward(
            input, output, negative_sampling_fn=model.negative_sample))
        loss_val += loss.data[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        num_batch += 1

        if (num_batch) % 10000 == 0:
            log.info("community embedding batches completed: {}".format(
                num_batch / (total_example / batch_size)))

    log.debug("O2 loss: {}".format(loss_val))
    return loss_val
Beispiel #22
0
def split_optimizer(model: nn.Module, cfg: dict):
    param_weight_decay, param_bias, param_other = split_params(model)
    if cfg['optimizer'] == 'Adam':
        optimizer = Adam(param_other,
                         lr=cfg['lr'],
                         betas=(cfg['momentum'],
                                0.999))  # adjust beta1 to momentum
    elif cfg['optimizer'] == 'SGD':
        optimizer = SGD(param_other,
                        lr=cfg['lr'],
                        momentum=cfg['momentum'],
                        nesterov=True)
    else:
        raise NotImplementedError("optimizer {:s} is not support!".format(
            cfg['optimizer']))
    optimizer.add_param_group({
        'params': param_weight_decay,
        'weight_decay': cfg['weight_decay']
    })  # add pg1 with weight_decay
    optimizer.add_param_group({'params': param_bias})  # add pg2 (biases)
    return optimizer
Beispiel #23
0
def test_trainer_train_with_metric(fake_loader, simple_neural_net):
    train_loader = fake_loader
    val_loader = fake_loader

    metrics = [BinaryAccuracy()]

    loss = nn.BCELoss()
    optimizer = SGD(simple_neural_net.parameters(), lr=0.001, momentum=0.9)

    trainer = TorchTrainer(simple_neural_net)
    trainer.prepare(optimizer,
                    loss,
                    train_loader,
                    val_loader,
                    transform_fn=transform_fn,
                    metrics=metrics,
                    validate_every=1)
    final_result = trainer.train(1, 4)

    assert 'binary_acc' in final_result
    assert 'val_binary_acc' in final_result
Beispiel #24
0
def learn_community(network, lr, model, nodes, beta=1.0, batch_size=20):
    """
    Helper function used to optimize O3
    :param network: model to optimize
    :param lr: learning rate
    :param model: model containing the shared data
    :param nodes: nodes on which execute the learning
    :param beta: trade-off value
    :param batch_size: size of the batch
    :return: loss value
    """

    num_batch = 0

    log.info("compute o3")
    optimizer = SGD(network.parameters(), lr)
    loss_val = 0

    if beta <= 0.:
        return loss_val

    for batch in emb_utils.batch_generator(emb_utils.prepare_sentences(
            model, nodes, network.transfer_fn()),
                                           batch_size,
                                           long_tensor=LongTensor):

        input, output = batch
        loss = network.forward(input, model)
        loss.data *= (beta / model.k)
        loss_val += loss.data[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        num_batch += 1

        if (num_batch) % 10000 == 0:
            log.info("community embedding batches completed: {}".format(
                num_batch / (total_example / batch_size)))

    log.debug("O3 loss: {}".format(loss_val))
    return loss_val
Beispiel #25
0
def learn_first(network, lr, model, edges, num_iter=1, batch_size=20):
    """
    Helper function used to optimize O1
    :param network: neural network to train
    :param lr: learning rate
    :param model: model containing the shared data
    :param edges: numpy list of edges used for training
    :param num_iter: iteration number over the edges
    :param batch_size: size of the batch
    :return: loss value
    """
    log.info("computing o1")
    optimizer = SGD(network.parameters(), lr)

    num_batch = 0
    total_batch = (edges.shape[0] * num_iter) / batch_size
    loss_val = 0
    for batch in emb_utils.batch_generator(emb_utils.prepare_sentences(
            model, edges, network.transfer_fn(model.vocab)),
                                           batch_size,
                                           long_tensor=LongTensor):

        input, output = batch
        loss = network.forward(input,
                               output,
                               negative_sampling_fn=model.negative_sample)

        loss_val += loss.data[0]
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        num_batch += 1

        if (num_batch) % 10000 == 0:
            log.info("community embedding batches completed: {}".format(
                num_batch / total_batch))

    log.debug("O1 loss: {}".format(loss_val))
    return loss_val
Beispiel #26
0
def learn_second(network, lr, model, examples_files, alpha=1.0):
    """
    Helper function used to optimize O1 and O3
    :param loss: loss to optimize
    :param lr: learning rate
    :param model: deprecated_model used to compute the batches and the negative sampling
    :param examples_files: list of files containing the examples
    :param num_iter: iteration number over the edges
    :return: 
    """
    log.info("compute o2")
    optimizer = SGD(network.parameters(), lr)
    log.debug("read example file: {}".format("\t".join(examples_files)))
    for batch in emb_utils.batch_generator(
            emb_utils.prepare_sentences(
                model, graph_utils.combine_example_files_iter(examples_files),
                network.transfer_fn(model.vocab)), 20):
        input, output = batch
        loss = (alpha * network.forward(
            input, output, negative_sampling_fn=model.negative_sample))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Beispiel #27
0
def learn_first(network, lr, model, edges, num_iter=1):
    """
    Helper function used to optimize O1 and O3
    :param network: neural network to train
    :param lr: learning rate
    :param model: deprecated_model used to compute the batches and the negative sampling
    :param edges: numpy list of edges used for training
    :param num_iter: iteration number over the edges
    :return: 
    """
    log.info("computing o1")
    optimizer = SGD(network.parameters(), lr)
    for batch in emb_utils.batch_generator(
            emb_utils.prepare_sentences(
                model, emb_utils.RepeatCorpusNTimes(edges, n=num_iter),
                network.transfer_fn(model.vocab)), 20):
        input, output = batch
        loss = network.forward(input,
                               output,
                               negative_sampling_fn=model.negative_sample)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Beispiel #28
0
def main(args):

    data_dir = args.data_dir
    figure_path = args.figure_dir
    model_path = args.model_dir

    # Set skip_training to False if the model has to be trained, to True if the model has to be loaded.
    skip_training = False

    # Set the torch device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device = {}".format(device))

    # Initialize parameters
    parameters = Params_cross(
        subject_n=args.sub,
        hand=args.hand,
        batch_size=args.batch_size,
        valid_batch_size=args.batch_size_valid,
        test_batch_size=args.batch_size_test,
        epochs=args.epochs,
        lr=args.learning_rate,
        wd=args.weight_decay,
        patience=args.patience,
        device=device,
        desc=args.desc,
    )
    # Import data and generate train-, valid- and test-set
    # Set if generate with RPS values or not (check network architecture used later)

    print("Testing: {} ".format(parameters.desc))

    mlp = False

    train_dataset = MEG_Cross_Dataset(data_dir,
                                      parameters.subject_n,
                                      parameters.hand,
                                      mode="train")

    valid_dataset = MEG_Cross_Dataset(data_dir,
                                      parameters.subject_n,
                                      parameters.hand,
                                      mode="val")

    test_dataset = MEG_Cross_Dataset(data_dir,
                                     parameters.subject_n,
                                     parameters.hand,
                                     mode="test")

    transfer_dataset = MEG_Cross_Dataset(data_dir,
                                         parameters.subject_n,
                                         parameters.hand,
                                         mode="transf")

    print("Train dataset len {}, valid dataset len {}, test dataset len {}, "
          "transfer dataset len {}".format(
              len(train_dataset),
              len(valid_dataset),
              len(test_dataset),
              len(transfer_dataset),
          ))

    # Initialize the dataloaders
    trainloader = DataLoader(train_dataset,
                             batch_size=parameters.batch_size,
                             shuffle=True,
                             num_workers=4)

    validloader = DataLoader(valid_dataset,
                             batch_size=parameters.valid_batch_size,
                             shuffle=True,
                             num_workers=4)

    testloader = DataLoader(
        test_dataset,
        batch_size=parameters.test_batch_size,
        shuffle=False,
        num_workers=4,
    )

    transferloader = DataLoader(transfer_dataset,
                                batch_size=parameters.valid_batch_size,
                                shuffle=True,
                                num_workers=4)

    # Initialize network
    if mlp:
        net = RPS_MLP()
    else:
        # Get the n_times dimension
        with torch.no_grad():
            sample, y, _ = iter(trainloader).next()

        n_times = sample.shape[-1]
        net = RPS_MNet_ivan(n_times)

    print(net)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        net = nn.DataParallel(net)

    # Training loop
    if not skip_training:
        print("Begin training....")

        # Check the optimizer before running (different from model to model)
        optimizer = Adam(net.parameters(),
                         lr=parameters.lr,
                         weight_decay=parameters.wd)
        # optimizer = SGD(net.parameters(), lr=parameters.lr, momentum=0.9, weight_decay=parameters.wd)

        scheduler = ReduceLROnPlateau(optimizer,
                                      mode="min",
                                      factor=0.5,
                                      patience=15)

        print("scheduler : ", scheduler)

        loss_function = torch.nn.MSELoss()
        # loss_function = torch.nn.L1Loss()
        start_time = timer.time()

        if mlp:
            net, train_loss, valid_loss = train_bp_MLP(
                net,
                trainloader,
                validloader,
                optimizer,
                scheduler,
                loss_function,
                parameters.device,
                parameters.epochs,
                parameters.patience,
                parameters.hand,
                model_path,
            )
        else:
            net, train_loss, valid_loss = train_bp(
                net,
                trainloader,
                validloader,
                optimizer,
                scheduler,
                loss_function,
                parameters.device,
                parameters.epochs,
                parameters.patience,
                parameters.hand,
                model_path,
            )

        train_time = timer.time() - start_time
        print("Training done in {:.4f}".format(train_time))

        # visualize the loss as the network trained
        fig = plt.figure(figsize=(10, 4))
        plt.plot(range(1,
                       len(train_loss) + 1),
                 train_loss,
                 label="Training Loss")
        plt.plot(range(1,
                       len(valid_loss) + 1),
                 valid_loss,
                 label="Validation Loss")

        # find position of lowest validation loss
        minposs = valid_loss.index(min(valid_loss)) + 1
        plt.axvline(
            minposs,
            linestyle="--",
            color="r",
            label="Early Stopping Checkpoint",
        )

        plt.xlabel("epochs")
        plt.ylabel("loss")
        # plt.ylim(0, 0.5) # consistent scale
        # plt.xlim(0, len(train_loss)+1) # consistent scale
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()
        image1 = fig
        plt.savefig(os.path.join(figure_path, "loss_plot.pdf"))

    if not skip_training:
        # Save the trained model
        save_pytorch_model(net, model_path, "model.pth")
    else:
        # Load the model (properly select the model architecture)
        net = RPS_MNet()
        net = load_pytorch_model(net, os.path.join(model_path, "model.pth"),
                                 parameters.device)

    # Evaluation
    print("Evaluation...")
    net.eval()
    y_pred = []
    y = []
    y_pred_valid = []
    y_valid = []

    # if RPS integration
    with torch.no_grad():
        if mlp:
            for _, labels, bp in testloader:
                labels, bp = (
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y.extend(list(labels[:, parameters.hand]))
                y_pred.extend((list(net(bp))))

            for _, labels, bp in validloader:
                labels, bp = (
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y_valid.extend(list(labels[:, parameters.hand]))
                y_pred_valid.extend((list(net(bp))))
        else:
            for data, labels, bp in testloader:
                data, labels, bp = (
                    data.to(parameters.device),
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y.extend(list(labels[:, parameters.hand]))
                y_pred.extend((list(net(data, bp))))

            for data, labels, bp in validloader:
                data, labels, bp = (
                    data.to(parameters.device),
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y_valid.extend(list(labels[:, parameters.hand]))
                y_pred_valid.extend((list(net(data, bp))))

    # Calculate Evaluation measures
    print("Evaluation measures")
    mse = mean_squared_error(y, y_pred)
    rmse = mean_squared_error(y, y_pred, squared=False)
    mae = mean_absolute_error(y, y_pred)
    r2 = r2_score(y, y_pred)

    rmse_valid = mean_squared_error(y_valid, y_pred_valid, squared=False)
    r2_valid = r2_score(y_valid, y_pred_valid)
    valid_loss_last = min(valid_loss)

    print("Test set ")
    print("mean squared error {}".format(mse))
    print("root mean squared error {}".format(rmse))
    print("mean absolute error {}".format(mae))
    print("r2 score {}".format(r2))

    print("Validation set")
    print("root mean squared error valid {}".format(rmse_valid))
    print("r2 score valid {}".format(r2_valid))
    print("last value of the validation loss: {}".format(valid_loss_last))

    # plot y_new against the true value focus on 100 timepoints
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    times = np.arange(200)
    ax.plot(times, y_pred[0:200], color="b", label="Predicted")
    ax.plot(times, y[0:200], color="r", label="True")
    ax.set_xlabel("Times")
    ax.set_ylabel("Target")
    ax.set_title("Sub {}, hand {}, Target prediction".format(
        str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx"))
    plt.legend()
    plt.savefig(os.path.join(figure_path, "Times_prediction_focus.pdf"))
    plt.show()

    # plot y_new against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    times = np.arange(len(y_pred))
    ax.plot(times, y_pred, color="b", label="Predicted")
    ax.plot(times, y, color="r", label="True")
    ax.set_xlabel("Times")
    ax.set_ylabel("Target")
    ax.set_title("Sub {}, hand {}, target prediction".format(
        str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx"))
    plt.legend()
    plt.savefig(os.path.join(figure_path, "Times_prediction.pdf"))
    plt.show()

    # scatterplot y predicted against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    ax.scatter(np.array(y), np.array(y_pred), color="b", label="Predicted")
    ax.set_xlabel("True")
    ax.set_ylabel("Predicted")
    # plt.legend()
    plt.savefig(os.path.join(figure_path, "Scatter.pdf"))
    plt.show()

    # scatterplot y predicted against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    ax.scatter(np.array(y_valid),
               np.array(y_pred_valid),
               color="b",
               label="Predicted")
    ax.set_xlabel("True")
    ax.set_ylabel("Predicted")
    # plt.legend()
    plt.savefig(os.path.join(figure_path, "Scatter_valid.pdf"))
    plt.show()

    # Transfer learning, feature extraction.

    optimizer_trans = SGD(net.parameters(), lr=3e-4)

    loss_function_trans = torch.nn.MSELoss()
    # loss_function_trans = torch.nn.L1Loss()

    if mlp:
        net, train_loss = train_mlp_transfer(
            net,
            transferloader,
            optimizer_trans,
            loss_function_trans,
            parameters.device,
            50,
            parameters.patience,
            parameters.hand,
            model_path,
        )
    else:
        # net, train_loss = train_bp_transfer(
        #     net,
        #     transferloader,
        #     optimizer_trans,
        #     loss_function_trans,
        #     parameters.device,
        #     50,
        #     parameters.patience,
        #     parameters.hand,
        #     model_path,
        # )
        net, train_loss = train_bp_fine_tuning(net, transferloader,
                                               optimizer_trans,
                                               loss_function_trans,
                                               parameters.device, 50, 10,
                                               parameters.hand, model_path)

    # Evaluation
    print("Evaluation after transfer...")
    net.eval()
    y_pred = []
    y = []

    # if RPS integration
    with torch.no_grad():
        if mlp:
            for _, labels, bp in testloader:
                labels, bp = (
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y.extend(list(labels[:, parameters.hand]))
                y_pred.extend((list(net(bp))))
        else:
            for data, labels, bp in testloader:
                data, labels, bp = (
                    data.to(parameters.device),
                    labels.to(parameters.device),
                    bp.to(parameters.device),
                )
                y.extend(list(labels[:, parameters.hand]))
                y_pred.extend((list(net(data, bp))))

    print("Evaluation measures")
    rmse_trans = mean_squared_error(y, y_pred, squared=False)
    r2_trans = r2_score(y, y_pred)

    print("root mean squared error after transfer learning {}".format(
        rmse_trans))
    print("r2 score after transfer learning  {}".format(r2_trans))

    # scatterplot y predicted against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    ax.scatter(np.array(y), np.array(y_pred), color="b", label="Predicted")
    ax.set_xlabel("True")
    ax.set_ylabel("Predicted")
    # plt.legend()
    plt.savefig(os.path.join(figure_path, "Scatter_after_trans.pdf"))
    plt.show()

    # log the model and parameters using mlflow tracker
    with mlflow.start_run(experiment_id=args.experiment) as run:
        for key, value in vars(parameters).items():
            mlflow.log_param(key, value)

        mlflow.log_param("Time", train_time)

        mlflow.log_metric("MSE", mse)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R2", r2)

        mlflow.log_metric("RMSE_Valid", rmse_valid)
        mlflow.log_metric("R2_Valid", r2_valid)
        mlflow.log_metric("Valid_loss", valid_loss_last)

        mlflow.log_metric("RMSE_T", rmse_trans)
        mlflow.log_metric("R2_T", r2_trans)

        mlflow.log_artifact(os.path.join(figure_path, "Times_prediction.pdf"))
        mlflow.log_artifact(
            os.path.join(figure_path, "Times_prediction_focus.pdf"))
        mlflow.log_artifact(os.path.join(figure_path, "loss_plot.pdf"))
        mlflow.log_artifact(os.path.join(figure_path, "Scatter.pdf"))
        mlflow.log_artifact(os.path.join(figure_path, "Scatter_valid.pdf"))
        mlflow.log_artifact(
            os.path.join(figure_path, "Scatter_after_trans.pdf"))
        mlflow.pytorch.log_model(net, "models")
Beispiel #29
0
    assert False, "Invalid model name"

if args.weights_path is None:
    model.load_vgg_weights()
else:
    model.load_weights(args.weights_path)

if args.device == 'cuda':
    model = nn.DataParallel(model)

logging.info(model)

### train info ###
if args.optimizer == 'SGD':
    optimizer = SGD(model.parameters(),
                    lr=args.learning_rate,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)
    logging.info(
        'Optimizer Info:'
        '\nOptimizer: {}'
        '\nlearning rate: {}, Momentum: {}, Weight decay: {}\n'.format(
            args.optimizer, args.learning_rate, args.momentum,
            args.weight_decay))
elif args.optimizer == 'Adam':
    optimizer = Adam(model.parameters(),
                     lr=args.learning_rate,
                     weight_decay=args.weight_decay)
    logging.info('Optimizer Info:'
                 '\nOptimizer: {}'
                 '\nlearning rate: {}, Weight decay: {}\n'.format(
                     args.optimizer, args.learning_rate, args.weight_decay))
Beispiel #30
0
def main():
    args = parser.parse_args()
    # REPRODUCIBILITY
    torch.manual_seed(0)
    np.random.seed(0)

    if args.debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # Retrieve views candidates and right number of views
    if args.case == '1':
        args.vcand = np.load('view_candidates/vcand_case1.npy')
        args.nview = 12
    elif args.case == '2':
        args.vcand = np.load('view_candidates/vcand_case2.npy')
        args.nview = 20
    elif args.case == '3':
        args.vcand = np.load('view_candidates/vcand_case3.npy')
        args.nview = 160

    # Names for the saved checkpoints
    args.fname_best = 'rotationnet{}_model_best{}.pth.tar'.format(args.nview,
                                                                  datetime.now().strftime("%d_%b_%Y_%H_%M_%S"))
    args.fname = 'rotationnet{}_model{}.pth.tar'.format(args.nview, datetime.now().strftime("%d_%b_%Y_%H_%M_%S"))

    logger.debug("Number of view candidates: {}".format(np.shape(args.vcand)[0]))
    logger.debug("Number of views: {}".format(args.nview))

    if torch.cuda.is_available():
        args.device = torch.device('cuda')
    else:
        args.device = torch.device('cpu')
    logger.debug("PyTorch is using  {}".format(args.device))

    # Mini batch size is used to do an update of the gradient so it need to be divisible by the number of views
    # otherwise one or more classification are not complete
    if args.batch_size % args.nview != 0:
        logger.error('Batch size should be multiplication of the number of views, {}'.format(args.nview))
        exit(1)

    # Get number of classes
    logger.debug("Number of classes: {}".format(args.num_classes))

    # Create RotationNet model based on the given architecture.
    # The output size is (num_classes + wrong_view class) * the number of views
    model = RotationNet(args.arch, args.pretrained, (args.num_classes + 1) * args.nview, args.feature_extraction,
                        args.depth)

    # Multi GPUs
    if torch.cuda.device_count() > 1:
        logger.debug("Using multiple GPUs")
        model = torch.nn.DataParallel(model)
    # Send model to GPU or keep it to the CPU
    model = model.to(device=args.device)

    # Define loss function (criterion) and optimizer
    # Sending loss to cuda is unnecessary because loss function is not stateful
    # TODO test if it works without sending loss to GPU
    criterion = nn.CrossEntropyLoss().to(device=args.device)

    if args.optimizer == "ADAM":
        optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate,
                         weight_decay=args.weight_decay)
    elif args.optimizer == "ADAGRAD":
        optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate,
                            weight_decay=args.weight_decay)
    else:
        # If we use feature extraction (features weights are frozen), we need to keep only differentiable params
        optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate,
                        momentum=args.momentum, weight_decay=args.weight_decay)

    # https://stackoverflow.com/questions/58961768/set-torch-backends-cudnn-benchmark-true-or-not
    # some boost when the network do not change
    # useless because cluster do not have cudnn
    # cudnn.benchmark = True

    logger.info("Model args: {}".format(args))

    if args.train_type == 'k-fold':
        logger.debug("K-fold training")
        train_k_fold(model, criterion, optimizer, args)
    elif args.train_type == 'hold-out':
        logger.debug("Hold-out training")
        train_hold_out(model, criterion, optimizer, args)
    elif args.train_type == 'full':
        logger.debug("Full training")
        train_all(model, criterion, optimizer, args)
    elif args.train_type == 'evaluate':
        logger.debug("Start evaluation on test set")
        test_model(model, criterion, args)
    elif args.train_type == 'aligned':
        logger.debug("Holt-out training on aligned set")
        train_hold_out_aligned(model, criterion,optimizer, args)
    elif args.train_type == "test":
        logger.debug("Start real time test")
        threshold_evaluation(model, args)