예제 #1
0
def main(dataset_name, save_dir, cfg):
    ## Data
    data_dir = os.path.join('/scail/data/group/atlas/kalpit/data',
                            dataset_name)
    dataset = Dataset(data_dir)
    dataset.data_reshape(
        (cfg.input_height, cfg.input_width, cfg.input_nchannels))

    ## Model
    print 'Creating Model...'
    model = get_model(dataset_name + '_conv', cfg)
    #model.summary()

    ## Train
    print 'Training Model...'
    starttime = time.time()
    train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc = train(
        model, dataset, cfg)
    endtime = time.time()
    #plot_loss(train_loss, save_dir, 'training_cost', 'training_cost')
    #plot_loss(val_loss, save_dir, 'validation_cost', 'validation_cost')

    ## Validate
    print ''
    print 'Final Validation...'
    validate(model, dataset)

    ## Training Time
    print 'Training Time: {:.2f}'.format(endtime - starttime)
    return min(train_loss)
예제 #2
0
def main(dataset_name, network, save_dir, cfg):
    ## Data
    data_dir = os.path.join('/scail/data/group/atlas/kalpit/data',
                            dataset_name)
    dataset = Dataset(data_dir)

    ## Model
    print 'Creating Model...'
    model = get_model(dataset_name + '_' + network, cfg)
    #model.summary()

    ## Train
    print 'Training Model...'
    starttime = time.time()
    if network == 'ff':
        if cfg.optimizer == 'kalpit':
            train_loss, val_loss, val_acc = train_ff_kalpit(
                model, dataset, cfg, save_dir)
        else:
            train_loss, val_loss, val_acc = train_ff_vanilla(
                model, dataset, cfg, save_dir)
    elif network == 'conv':
        dataset.data_reshape(
            (cfg.input_height, cfg.input_width,
             cfg.input_nchannels))  # for both mnist and cifar10
        if cfg.optimizer == 'kalpit':
            train_loss, val_loss, val_acc = train_conv_kalpit(
                model, dataset, cfg, save_dir)
        else:
            train_loss, val_loss, val_acc = train_conv_vanilla(
                model, dataset, cfg, save_dir)
    elif network == 'autoencoder':
        if cfg.optimizer == 'kalpit':
            train_loss, val_loss = train_autoencoder_kalpit(
                model, dataset, cfg, save_dir)
        else:
            train_loss, val_loss = train_autoencoder_vanilla(
                model, dataset, cfg, save_dir)
    else:
        raise NotImplementedError
    endtime = time.time()
    #plot_loss(train_loss, save_dir, 'training_cost', 'training_cost')
    #plot_loss(val_loss, save_dir, 'validation_cost', 'validation_cost')

    ## Validate
    print ''
    print 'Final Validation...'
    if network == 'ff':
        validate_ff(model, dataset)
    elif network == 'conv':
        validate_conv(model, dataset)
    elif network == 'autoencoder':
        validate_autoencoder(model, dataset)

    ## Training Time
    print 'Training Time: {:.2f}'.format(endtime - starttime)
    return min(train_loss)
예제 #3
0
    def samplePredictions(self):
        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda:0" if use_cuda else "cpu")
        torch.backends.cudnn.benchmark = True

        validation_set = Dataset(self.test_set_IDs)
        validation_generator = torch.utils.data.DataLoader(
            validation_set, **self.params)

        with torch.set_grad_enabled(False):
            counter = 0
            for local_batch, local_labels in validation_generator:
                if counter % 10 == 0:
                    print(f'Batch prediction number {counter+1}...')
                outputs = []
                for sample in range(self.n_samples):
                    # Transfer to GPU
                    local_batch, local_labels = local_batch.to(
                        device), local_labels.to(device)

                    # Model computations
                    output = self.model(local_batch.float())

                    outputs.append(output)

                # do inverse transform before taking mean and std
                preds = [
                    self.inverseScale(outputs[k]) for k in range(len(outputs))
                ]

                if self.difference_length > 0:
                    IDs = self.test_set_IDs[counter *
                                            self.params['batch_size']:
                                            (counter + 1) *
                                            self.params['batch_size']]
                    preds = [
                        self.inverseDifference(preds[k], IDs)
                        for k in range(len(outputs))
                    ]

                preds = torch.stack(preds)

                # find mean and std for each observation, store these in a list
                if counter == 0:
                    means = preds.mean(axis=0)
                    stds = preds.std(axis=0)
                else:
                    means = torch.cat((means, preds.mean(axis=0)), 0)
                    stds = torch.cat((stds, preds.std(axis=0)), 0)
                counter += 1

        self.means = means
        self.stds = stds
예제 #4
0
파일: train.py 프로젝트: jorgeta/dlproject
def train_model(
    model,
    max_epochs,
    params,
    partition,
    elbo_sample_nbr,
):

    # CUDA for PyTorch
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    torch.backends.cudnn.benchmark = True

    model.to(device)

    # Generators
    training_set = Dataset(partition['train'])
    training_generator = torch.utils.data.DataLoader(training_set, **params)

    validation_set = Dataset(partition['test'])
    validation_generator = torch.utils.data.DataLoader(validation_set,
                                                       **params)

    # define criterion and optimiser
    criterion = nn.MSELoss()
    optimiser = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1.0)
    #optimiser = optim.SGD(model.parameters(), lr=1e-3)

    # store losses per epoch
    train_losses = np.zeros(max_epochs)
    test_losses = np.zeros(max_epochs)

    scheduler = StepLR(optimiser, step_size=15)

    for epoch in range(max_epochs):
        # Training
        counter = 0
        current_loss_sum = 0
        for local_batch, local_labels in training_generator:
            # Transfer to GPU
            local_batch, local_labels = local_batch.to(
                device), local_labels.to(device)

            # Model computations
            optimiser.zero_grad()

            # forward and backward propagation
            batch_loss_elbo = model.sample_elbo(inputs=local_batch.float(),
                                                labels=local_labels.float(),
                                                criterion=criterion,
                                                sample_nbr=elbo_sample_nbr)

            batch_loss_elbo.backward()

            optimiser.step()

            output = model(local_batch)

            current_train_loss = criterion(output, local_labels.float())
            current_loss_sum += current_train_loss.item()

            counter += 1
        train_losses[epoch] = current_loss_sum / counter

        # Validation
        counter = 0
        current_loss_sum = 0
        with torch.set_grad_enabled(False):
            for local_batch, local_labels in validation_generator:
                # Transfer to GPU
                local_batch, local_labels = local_batch.to(
                    device), local_labels.to(device)

                # Model computations
                output = model(local_batch.float())

                current_test_loss = criterion(output, local_labels.float())
                current_loss_sum += current_test_loss.item()

                counter += 1
            test_losses[epoch] = current_loss_sum / counter

        scheduler.step()

        # Output losses after each epoch
        print(
            f'Epoch {epoch+1} train loss: {train_losses[epoch]}, test loss: {test_losses[epoch]}'
        )

    return model, train_losses, test_losses
예제 #5
0
        if not os.path.exists(save_dir):
            break
    #run_id = run_id
    #save_dir = '/atlas/u/kalpit/Second-Order/code/mnist/output'
    os.system('rm -rf '+save_dir)
    os.makedirs(save_dir)
   
    ## redirect stdout
    if final_run:
        sys.stdout = open(os.path.join(save_dir, 'stdout'), 'w')
    print run_id
    print 'testing'

    ## Data
    data_dir = os.path.join('/scail/data/group/atlas/kalpit/data', dataset_name)
    dataset = Dataset(data_dir)

    ## Config
    cfg = Config(save_dir)
    
    ## Model
    print 'Creating Model...'
    print 'DROPOUT NOT IMPLEMENTED CORRECTLY FOR VALIDATION!!!'
    model = get_model(dataset_name, cfg)
    #model.summary()

    ## Train
    print 'Training Model...'
    starttime = time.time()
    train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc = train(model, dataset, cfg)
    endtime = time.time()