Exemple #1
0
 def test_splits_are_mutually_exclusive(self):
     data = [5, 2, 3, 4, 1, 6]
     splits = random_split(data, [2, 4])
     all_values = []
     all_values.extend(list(splits[0]))
     all_values.extend(list(splits[1]))
     data.sort()
     all_values.sort()
     self.assertListEqual(data, all_values)
def criterion_seg(pred, label):
    return nn.MSELoss()(pred, label)


train_dataset = UnpairedDataset('../',
                                path_a='ct_sag_kr/train',
                                path_b=None,
                                corrupt=True,
                                augment=True)
test_dataset = UnpairedDataset('../',
                               path_a='ct_sag_kr/test',
                               path_b=None,
                               corrupt=True)
num_test = len(test_dataset) - num_val

train_dataset, _ = random_split(
    train_dataset, [num_train, len(train_dataset) - num_train])
val_dataset, test_dataset = random_split(test_dataset, [num_val, num_test])

train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          num_workers=5,
                          pin_memory=True)
val_loader = DataLoader(val_dataset,
                        batch_size=1,
                        shuffle=False,
                        num_workers=5,
                        pin_memory=True)
test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False,
                         num_workers=5,
Exemple #3
0
    ])

else:
    transformed_dataset = ConcatDataset([
        transformed_dataset_cw_1, transformed_dataset_cw_2,
        transformed_dataset_cw_3, transformed_dataset_cw_4,
        transformed_dataset_cw_5, transformed_dataset_cw_6,
        transformed_dataset_cw_7, transformed_dataset_cw_8,
        transformed_dataset_cw_9, transformed_dataset_cw_10,
        transformed_dataset_cw_11
    ])

train_len = np.int(args.train_percent * len(transformed_dataset))

train_data, test_data = random_split(
    transformed_dataset,
    [train_len, len(transformed_dataset) - train_len])

# load trainig data loader
kwargs = {'num_workers': 8, 'pin_memory': True} if args.cuda else {}
train_loader = DataLoader(train_data,
                          batch_size=args.batch_size,
                          shuffle=True,
                          drop_last=False,
                          **kwargs)

# load testing data loader
test_loader = DataLoader(test_data,
                         batch_size=args.test_batch_size,
                         shuffle=True,
                         drop_last=False,
Exemple #4
0
# preprocess data and feed them into data loadesr -----------
x = np.array(df['data'].values, dtype=np.uint8).reshape((len(df), 1))
x = np.unpackbits(x, axis=1)  # change decimal to binary
y = np.array(df['label'].values)
print(x.shape)
print(y.shape)

# create data_loaders
x_tensor = torch.from_numpy(x).float()
y_tensor = torch.from_numpy(y).float()

dataset = TensorDataset(x_tensor, y_tensor)

train_size = int(len(dataset) / 5 * 4)  # 20% used for validation
train_dataset, val_dataset = random_split(
    dataset, [train_size, len(dataset) - train_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size)
val_loader = DataLoader(dataset=val_dataset, batch_size=len(val_dataset))


# model -----------------------------------------------------
# model with one hidden layer and one output layer
class my_model(nn.Module):
    def __init__(self, n_in=8, n_hidden=10, n_out=2):
        super(my_model, self).__init__()
        self.n_in = n_in
        self.n_out = n_out

        self.layer1 = nn.Linear(self.n_in, self.n_out,
                                bias=True)  # hidden layer
Exemple #5
0
])

test_transform = A.Compose([
    A.SmallestMaxSize(args.max_size),
    #A.CenterCrop(args.image_size, args.image_size, p=1.),
    A.Normalize(mean=[0.4452, 0.4457, 0.4464], std=[0.2592, 0.2596, 0.2600]),
    ToTensorV2(),
])

# Dataset, Dataloader 정의
dataset = TrainDataset(args, transform=train_transform)

# Add remained last one data
train_size = int(len(dataset) * 0.8) + 1
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
val_dataset.transform = test_transform
test_dataset = TestDataset(args, transform=test_transform)

train_sampler = RandomSampler(train_dataset)
val_sampler = SequentialSampler(val_dataset)
test_sampler = SequentialSampler(test_dataset)

train_loader = DataLoader(train_dataset,
                          sampler=train_sampler,
                          batch_size=args.batch_size,
                          num_workers=args.num_workers,
                          pin_memory=False,
                          drop_last=True)
val_loader = DataLoader(val_dataset,
                        sampler=val_sampler,
Exemple #6
0
    # return none for the time being
    return mean_dice_a, mean_dice_b, mean_loss_a, mean_loss_b


# loader from siegen dataset for validation
train_scan_dataset = UnpairedDataset('../',
                                     'ct_sag_kr/train',
                                     'mr_sag_kr/train',
                                     contours=True)
test_scan_dataset = UnpairedDataset('../',
                                    'ct_sag_kr/test',
                                    'mr_sag_kr/test',
                                    contours=True)

scan_dataset, _ = random_split(
    train_scan_dataset,
    [num_train, len(train_scan_dataset) - num_train])
scan_dataset_test, scan_dataset_val, _ = random_split(
    test_scan_dataset,
    [num_val, num_test,
     len(test_scan_dataset) - (num_val + num_test)])

train_loader = DataLoader(scan_dataset, batch_size=batch_size, num_workers=5)
val_loader = DataLoader(scan_dataset_val,
                        batch_size=1,
                        shuffle=False,
                        num_workers=5)
test_loader = DataLoader(scan_dataset_test,
                         batch_size=1,
                         shuffle=False,
                         num_workers=5)
Exemple #7
0
 def test_splits_have_correct_size(self):
     splits = random_split([1, 2, 3, 4, 5, 6], [2, 4])
     self.assertEqual(len(splits), 2)
     self.assertEqual(len(splits[0]), 2)
     self.assertEqual(len(splits[1]), 4)
Exemple #8
0
def model_train(config, train_dataset, checkpoint_dir=None):
    # print(config)
    test_abs = int(len(train_dataset) * 0.8)
    train_subset, val_subset = random_split(
        train_dataset, [test_abs, len(train_dataset) - test_abs])

    train_loader = DataLoader(dataset=train_subset,
                              batch_size=int(config["batch_size"]),
                              shuffle=True)
    val_loader = DataLoader(dataset=val_subset,
                            batch_size=int(config["batch_size"]),
                            shuffle=True)
    '''Define the CNN model'''
    model = CNN()
    # print(model.state_dict())
    loss_function = nn.MSELoss()  #(reduction='mean')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=3 * pow(10, -3))  # 3 * pow(10, -5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
    '''Model training'''
    model.train()
    for epoch in range(config["n_epoch"]):
        running_loss = []
        for i, data in enumerate(train_loader, 0):
            x_batch, y_batch = data
            predictions = model(x_batch)
            # print(predictions.size(), y_batch.size())

            loss = loss_function(predictions, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss.append(loss.item())
            # if i % 100 == 99:  # print every 200 mini-batches
            #   print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, np.mean(np.array(running_loss))))
            #   running_loss = []

        val_loss = 0.0
        val_steps = 0
        mean_error = 0
        model.eval()
        with torch.no_grad():
            predictions_list = []
            targets_list = []
            for i, data in enumerate(val_loader, 0):
                x_batch, y_batch = data
                predictions = model(x_batch)
                # print(predictions.size(), y_batch.size())

                loss = loss_function(predictions, y_batch)
                predictions = predictions.numpy().reshape(-1)
                y_batch = y_batch.numpy().reshape(-1)
                predictions_list.extend(predictions.tolist())
                targets_list.extend(y_batch.tolist())
                mean_error += np.mean(abs(predictions - y_batch))
                val_loss += loss.item()
                val_steps += 1
        scheduler.step(val_loss)

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)
        tune.report(train_loss=np.mean(np.array(running_loss)),
                    loss=(val_loss / val_steps),
                    mean_error=mean_error / val_steps)
    #calib_posterior.set_to(calib_prior)
    #calib_posterior.row_cov.parameter.detach()

    calib_posterior.stddev = .15
    calib_posterior.loc.data = torch.randn(1)

    model = AdditiveDiscrepancy(computer_model,
                                discrepancy,
                                calib_prior,
                                calib_posterior,
                                true_calib=true_calib)

    ### Initialization of the computer model
    init_batchsize_run = run_size  # all data are taken for initialization
    init_data_run, _ = random_split(
        train_data_loader.loaders[1].dataset,
        [init_batchsize_run, run_size - init_batchsize_run])
    dataloader_run_for_init = SingleSpaceBatchLoader(DataLoader(
        init_data_run, batch_size=init_batchsize_run),
                                                     cat_inputs=True)
    computer_model_initializer = IBLMInitializer(computer_model,
                                                 dataloader_run_for_init,
                                                 noise_var=noise_std_run**2)
    computer_model_initializer.initialize()

    lr = .02
    iterations_free_noise = 100
    device = None
    verbose = False
    lr_calib = 0.1
    outdir = vcal.vardl_utils.next_path('workspace/minimalist_example/%s/' %
    VGG_loss = VGGContentLossMultiLayer([27]).to(device)
    L1_loss = nn.L1Loss().to(device)
    L2_loss = nn.MSELoss().to(device)

    def critereon(input, output, target):
        return VGG_loss(output, target) + L2_loss(output, target) + L1_loss(
            output, input)

    black_flags = open("./data/black_flags.txt",
                       encoding="utf-8").read().splitlines()
    dataset = BallFlagDatasetBSM("./data",
                                 exclude_countries=black_flags,
                                 use_augmentation=True)
    test_len = int(len(dataset) * test_percentage)
    train_len = len(dataset) - test_len
    train_dataset, test_dataset = random_split(dataset, [train_len, test_len])

    train_dataloader = DataLoader(train_dataset,
                                  bs,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True,
                                  num_workers=1)
    test_dataloader = DataLoader(test_dataset,
                                 bs,
                                 shuffle=False,
                                 pin_memory=True,
                                 drop_last=True,
                                 num_workers=0)
    assert len(test_dataloader) > 0, "too few samples"
    # Weird code but this is how it has to be done
Exemple #11
0

seed_torch()

# Paths to the files with training, and validation sets.
# Each file contains pairs (path to image, output vector)
#pathFileTrain = ''
pathFileTrain = '1kmultiViewCSV.csv'
# pathFileTrain = 'CheXpert-v1.0-small/multiViewCSV.csv'
pathFileValid = 'CheXpert-v1.0-small/valid.csv'

#LOAD DATASET
#dataset = CheXpertDataSet(pathFileTrain ,transformSequence, policy=policy)
dataset = CheXpertDataSet(pathFileTrain, transformSequence, policy=policy)

datasetTest, datasetTrain = random_split(dataset, [500, len(dataset) - 500])
# datasetTest = torch.load("test_frontal_pa.txt")

datasetValid = CheXpertDataSet(pathFileValid, transformSequence)

dataLoaderTrain = DataLoader(dataset=datasetTrain,
                             batch_size=trBatchSize,
                             shuffle=True,
                             num_workers=24,
                             pin_memory=True)
dataLoaderVal = DataLoader(dataset=datasetValid,
                           batch_size=trBatchSize,
                           shuffle=False,
                           num_workers=24,
                           pin_memory=True)
dataLoaderTest = DataLoader(dataset=datasetTest,
Exemple #12
0
def main():

    num_args = len(sys.argv)

    # Checking if filename input is specified
    if num_args < 2:
        sys.exit("Please specify an input file")

    filename = str(sys.argv[1])
    p = Path(filename)

    # Checking if filepath is valid and/or file exists
    if not (p.exists()):
        sys.exit("File not found")

    # Prepare data processing pipelines
    tokenizer = get_tokenizer('basic_english')
    train_iter = AG_NEWS(split='train')

    vocab = build_vocab_from_iterator(yield_tokens(train_iter, tokenizer),
                                      specials=["<unk>"])
    vocab.set_default_index(vocab["<unk>"])

    text_pipeline = lambda x: vocab(tokenizer(x))
    label_pipeline = lambda x: int(x) - 1

    # Generate data batch and iterator
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def collate_batch(batch):
        label_list, text_list, offsets = [], [], [0]
        for (_label, _text) in batch:
            label_list.append(label_pipeline(_label))
            processed_text = torch.tensor(text_pipeline(_text),
                                          dtype=torch.int64)
            text_list.append(processed_text)
            offsets.append(processed_text.size(0))
        label_list = torch.tensor(label_list, dtype=torch.int64)
        offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
        text_list = torch.cat(text_list)
        return label_list.to(device), text_list.to(device), offsets.to(device)

    # This variable needs to be initialized twice or else an IndexError occurs
    train_iter = AG_NEWS(split='train')
    dataloader = DataLoader(train_iter,
                            batch_size=8,
                            shuffle=False,
                            collate_fn=collate_batch)

    # Build an instance
    num_class = len(set([label for (label, text) in train_iter]))
    vocab_size = len(vocab)
    emsize = 64
    model = TextClassificationModel(vocab_size, emsize, num_class).to(device)

    # Split the dataset and run the model
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None
    train_iter, test_iter = AG_NEWS()
    train_dataset = to_map_style_dataset(train_iter)
    test_dataset = to_map_style_dataset(test_iter)
    num_train = int(len(train_dataset) * 0.95)
    split_train_, split_valid_ = \
        random_split(train_dataset,
        [num_train, len(train_dataset) - num_train])

    train_dataloader = DataLoader(split_train_,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=collate_batch)
    valid_dataloader = DataLoader(split_valid_,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  collate_fn=collate_batch)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=BATCH_SIZE,
                                 shuffle=True,
                                 collate_fn=collate_batch)

    # Run epochs
    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()
        train(train_dataloader, model, optimizer, criterion, epoch)
        accu_val = evaluate(valid_dataloader, model, criterion)
        if total_accu is not None and total_accu > accu_val:
            scheduler.step()
        else:
            total_accu = accu_val
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               accu_val))
        print('-' * 59)

    print('Checking the results of test dataset.')
    accu_test = evaluate(test_dataloader, model, criterion)
    print('test accuracy {:8.3f}'.format(accu_test))

    # Run article prediction
    ag_news_label = {1: "World", 2: "Sports", 3: "Business", 4: "Sci/Tec"}

    with p.open() as readfile:
        ex_text_str = readfile.read()

    model = model.to("cpu")

    print("This is a %s news" %
          ag_news_label[predict(ex_text_str, text_pipeline, model)])
Exemple #13
0
def test2():
    Ms = [GRU, LSTM]
    name = ["MoE", "GRU", "LSTM"]

    X = torch.randn((100, 8, 30))
    X2 = torch.randn((100, 8, 40))
    Y = X
    Y2 = X2

    feature_dims = {
        "phase_dim": 5,
        "pose_dim": 15,
        "cost_dim": 10,
        "g_input_dim": config["k"] + config["cost_hidden_dim"],
        "g_output_dim": 5 + config["k"] + 10
    }

    feature_dims2 = {
        "phase_dim": 5,
        "pose_dim": 25,
        "cost_dim": 10,
        "g_input_dim": config["k"] + config["cost_hidden_dim"],
        "g_output_dim": 5 + config["k"] + 10
    }

    in_slice = [5, 15, 10]
    in_slice2 = [5, 25, 10]
    out_slice = [5, config["k"], 10]
    out_slice2 = [5, config["k"], 10]

    dataset = TensorDataset(X, Y)
    dataset2 = TensorDataset(X2, Y2)
    train_set, val_set, test_set = random_split(dataset, [80, 10, 10])
    train_set2, val_set2, test_set2 = random_split(dataset2, [80, 10, 10])
    train_loader = DataLoader(train_set, batch_size=10)
    train_loader2 = DataLoader(train_set2, batch_size=10)
    val_loader = DataLoader(val_set, batch_size=10)
    val_loader2 = DataLoader(val_set2, batch_size=10)
    test_loader = DataLoader(test_set, batch_size=10)
    test_loader2 = DataLoader(test_set2, batch_size=10)

    for nam, M in zip(name, Ms):
        pose_encoder = MLP(config=config, dimensions=[15])
        pose_encoder2 = MLP(config=config,
                            dimensions=[15],
                            pose_labels=torch.arange(63).unsqueeze(0))
        pose_encoder3 = MLP(config=config,
                            dimensions=[
                                25, config["hidden_dim"], config["hidden_dim"],
                                config["k"]
                            ],
                            pose_labels=torch.arange(63).unsqueeze(0))

        m1 = MotionGenerationModelRNN(
            config=config,
            Model=M,
            pose_autoencoder=pose_encoder,
            feature_dims=feature_dims,
            input_slicers=in_slice,
            output_slicers=out_slice,
            train_set=train_set,
            val_set=val_set,
            test_set=test_set,
        )
        m2 = MotionGenerationModelRNN(
            config=config,
            Model=M,
            pose_autoencoder=pose_encoder2,
            feature_dims=feature_dims,
            input_slicers=in_slice,
            output_slicers=out_slice,
            train_set=train_set,
            val_set=val_set,
            test_set=test_set,
        )

        print("-" * 50, nam, "-" * 50)
        trainer = pl.Trainer(max_epochs=5)
        print(nam, "-TEST RESULTS BEFORE", "-" * 50)
        res1 = trainer.test(m1, test_loader)
        trainer.fit(
            m1,
            train_loader,
            val_loader,
        )
        print(nam, "-TEST RESULTS AFTER", "-" * 50)
        res2 = trainer.test(m1, test_loader)
        print("IMPROVEMENT: ", res1[0]["test_loss"] - res2[0]["test_loss"])

        print("-" * 50, nam, "-" * 50)
        trainer = pl.Trainer(max_epochs=5)
        print(nam, "-TEST RESULTS BEFORE", "-" * 50)
        res1 = trainer.test(m2, test_loader)
        trainer.fit(
            m2,
            train_loader,
            val_loader,
        )
        print(nam, "-TEST RESULTS AFTER", "-" * 50)
        res2 = trainer.test(m2, test_loader)
        print("IMPROVEMENT: ", res1[0]["test_loss"] - res2[0]["test_loss"])

        m2.swap_pose_encoder(pose_encoder=pose_encoder3,
                             input_dim=in_slice2,
                             output_dim=out_slice2,
                             feature_dims=feature_dims2,
                             freeze=True)
        print("-" * 50, nam, "-" * 50)
        trainer = pl.Trainer(max_epochs=5)
        print(nam, "-TEST RESULTS BEFORE", "-" * 50)
        res1 = trainer.test(m2, test_loader2)
        trainer.fit(m2, train_loader2, val_loader2)
        print(nam, "-TEST RESULTS AFTER", "-" * 50)
        res2 = trainer.test(m2, test_loader2)
        print("IMPROVEMENT: ", res1[0]["test_loss"] - res2[0]["test_loss"])
Exemple #14
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split

epochs = 5
lr = 0.01
momentum = 0.5
log_interval = 200
seed = 1
torch.manual_seed(seed)

# create training and validation dataset
dataset_train, dataset_valid = random_split(
    Dataset('./pca1_data/'), [149, 30],
    generator=torch.Generator().manual_seed(42))
dataset_train, dataset_test = random_split(
    dataset_train, [121, 28], generator=torch.Generator().manual_seed(42))
NWORKERS = 24
device = 'cuda' if torch.cuda.is_available() else 'cpu'

TRAIN = DataLoader(dataset=dataset_train,
                   batch_size=3,
                   shuffle=True,
                   drop_last=False,
                   num_workers=NWORKERS)

TEST = DataLoader(dataset=dataset_test,
                  batch_size=3,
                  shuffle=True,
Exemple #15
0
 def test_splits_have_correct_size(self):
     splits = random_split([1, 2, 3, 4, 5, 6], [2, 4])
     self.assertEqual(len(splits), 2)
     self.assertEqual(len(splits[0]), 2)
     self.assertEqual(len(splits[1]), 4)
Exemple #16
0

class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.x)


dataset = TensorDataset(x_tensor, y_tensor)
(train_dataset, val_dataset) = random_split(dataset, [80, 20])
hvd_sampler_train_loader = torch.utils.data.distributed.DistributedSampler(
    dataset=train_dataset, num_replicas=hvd.size(), rank=hvd.rank())
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=16,
                          sampler=hvd_sampler_train_loader)
hvd_sampler_val_loader = torch.utils.data.distributed.DistributedSampler(
    dataset=val_dataset, num_replicas=hvd.size(), rank=hvd.rank())
val_loader = DataLoader(dataset=val_dataset,
                        batch_size=20,
                        sampler=hvd_sampler_val_loader)


class ManualLinearRegression(nn.Module):
    def __init__(self):
        super().__init__()
Exemple #17
0
def train_model(model, x, y):
    if len(np.shape(x)) == 1:
        x = x[:, None]
    if len(np.shape(y)) == 1:
        y = y[:, None]
    D_in = np.shape(x)[1]
    D_out = np.shape(y)[1]
    N = 50

    dataset = TensorDataset(x, y)

    N_train = int(3 * len(y) / 5)
    train_dataset, val_dataset = random_split(
        dataset, [N_train, len(y) - N_train])

    train_loader = DataLoader(dataset=train_dataset, batch_size=N)
    val_loader = DataLoader(dataset=val_dataset, batch_size=N)

    loss_fn = torch.nn.MSELoss(reduction='sum')
    val_loss_fn = lambda target, output: loss_fn(target[:, :2], output[:, :2])

    # Use the optim package to define an Optimizer that will update the weights of
    # the model for us. Here we will use Adam; the optim package contains many other
    # optimization algorithms. The first argument to the Adam constructor tells the
    # optimizer which Tensors it should update.
    learning_rate = .0001
    n_epochs = 10000
    training_losses = []
    validation_losses = []
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for t in range(n_epochs):
        batch_losses = []

        with torch.no_grad():
            val_losses = []
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                yhat = model(x_val)
                val_loss = val_loss_fn(y_val, yhat).item()
                val_losses.append(val_loss)
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            # Forward pass: compute predicted y by passing x to the model.
            y_pred = model(x_batch)

            # Compute and print loss.
            loss = loss_fn(y_pred, y_batch)

            optimizer.zero_grad()

            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()

            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()

            batch_losses.append(loss.item())
        training_loss = np.mean(batch_losses)
        training_losses.append(training_loss)

        print(
            f"[{t+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}"
        )

        if t > 100 and validation_losses[-2] <= validation_losses[-1]:
            break

    plt.figure()
    plt.semilogy(range(len(training_losses)),
                 training_losses,
                 label='Training Loss')
    plt.semilogy(range(len(training_losses)),
                 validation_losses,
                 label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    model.eval()
    return model
Exemple #18
0
 def test_lengths_must_equal_datset_size(self):
     with self.assertRaises(ValueError):
         random_split([1, 2, 3, 4], [1, 2])
    return loss / len(data_), acc / len(data_)


# ## Split the dataset and run the model

import time
from torch.utils.data.dataset import random_split
N_EPOCHS = 5
min_valid_loss = float('inf')

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

train_len = int(len(train_dataset) * 0.95)
sub_train_, sub_valid_ = random_split(
    train_dataset, [train_len, len(train_dataset) - train_len])

for epoch in range(N_EPOCHS):

    start_time = time.time()
    train_loss, train_acc = train_func(sub_train_)
    valid_loss, valid_acc = test(sub_valid_)

    secs = int(time.time() - start_time)
    mins = secs / 60
    secs = secs % 60

    print('Epoch: %d' % (epoch + 1),
          " | time in %d minutes, %d seconds" % (mins, secs))
    print(
        f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)'
y_tensors = torch.stack([
    torch.from_numpy(clip[1:][:, :-(cost_dim + extra_feature_len)]).float()
    for clip in data_tensors
])

print(len(x_tensors), x_tensors[0].shape)
print(len(y_tensors), y_tensors[0].shape)

dataset = TensorDataset(x_tensors, y_tensors)
N = len(x_tensors)

train_ratio = int(.7 * N)
val_ratio = int((N - train_ratio) / 2.0)
test_ratio = N - train_ratio - val_ratio
train_set, val_set, test_set = random_split(
    dataset, [train_ratio, val_ratio, test_ratio],
    generator=torch.Generator().manual_seed(2021))
print(len(train_set), len(val_set), len(test_set))

input_dim = phase_dim + pose_dim + cost_dim
output_dim = phase_dim + pose_dim - extra_feature_len
print(input_dim)
print(output_dim)

config = {
    "k_experts": tune.choice([1, 2, 4, 8, 10]),
    "gate_size": tune.choice([16, 32, 64, 128]),
    "keep_prob": tune.choice([.2, .25, .3]),
    "hidden_dim": tune.choice([16, 32, 64, 128, 256, 512]),
    "cost_hidden_dim": tune.choice([16, 32, 64, 128]),
    "batch_size": tune.choice([1]),
Exemple #21
0
def train_model(x, y):
    if len(np.shape(y)) == 1:
        y = y[:, None]
    D_in = np.shape(x)[1]
    D_out = np.shape(y)[1]

    x_train_tensor = torch.from_numpy(x).float()
    y_train_tensor = torch.from_numpy(y).float()

    dataset = TensorDataset(x_train_tensor, y_train_tensor)

    N_train = int(4 * len(y) / 5)
    train_dataset, val_dataset = random_split(
        dataset, [N_train, len(y) - N_train])

    train_loader = DataLoader(dataset=train_dataset, batch_size=N)
    val_loader = DataLoader(dataset=val_dataset, batch_size=N)

    # Use the nn package to define our model and loss function.
    model = torch.nn.Sequential(
        torch.nn.Linear(D_in, H),
        torch.nn.ReLU(),
        torch.nn.ReLU(),
        torch.nn.ReLU(),
        torch.nn.Linear(H, D_out),
    )
    loss_fn = torch.nn.MSELoss(reduction='sum')

    # Use the optim package to define an Optimizer that will update the weights of
    # the model for us. Here we will use Adam; the optim package contains many other
    # optimization algorithms. The first argument to the Adam constructor tells the
    # optimizer which Tensors it should update.
    learning_rate = .0001
    n_epochs = 5000
    training_losses = []
    validation_losses = []
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    for t in range(n_epochs):

        with torch.no_grad():
            val_losses = []
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                model.eval()
                yhat = model(x_val)
                val_loss = loss_fn(y_val, yhat).item()
                val_losses.append(val_loss)
            validation_loss = np.mean(val_losses)
            validation_losses.append(validation_loss)

        batch_losses = []
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            model.train()
            # Forward pass: compute predicted y by passing x to the model.
            y_pred = model(x_batch)

            # Compute and print loss.
            loss = loss_fn(y_pred, y_batch)

            # Before the backward pass, use the optimizer object to zero all of the
            # gradients for the variables it will update (which are the learnable
            # weights of the model). This is because by default, gradients are
            # accumulated in buffers( i.e, not overwritten) whenever .backward()
            # is called. Checkout docs of torch.autograd.backward for more details.
            optimizer.zero_grad()

            # Backward pass: compute gradient of the loss with respect to model
            # parameters
            loss.backward()

            # Calling the step function on an Optimizer makes an update to its
            # parameters
            optimizer.step()

            batch_losses.append(loss.item())
        training_loss = np.mean(batch_losses)
        training_losses.append(training_loss)

        print(
            f"[{t+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}"
        )

        if t > 1000 and validation_losses[-1] < 0.05 and np.mean(
                validation_losses[-20:-10]) < np.mean(
                    validation_losses[-9:-1]):
            break

    model.eval()

    # plt.figure()
    # plt.semilogy(range(len(training_losses)), training_losses, label='Training Loss')
    # plt.semilogy(range(len(training_losses)), validation_losses, label='Validation Loss')
    # plt.xlabel('Epoch')
    # plt.ylabel('Loss')
    # plt.legend()
    # plt.show()

    return model
Exemple #22
0
def get_data_loader(dataset_name, batch_size):

    # retrieve dataset constructor
    if dataset_name == "svhn":
        dataset = torchvision.datasets.SVHN
    elif dataset_name == "cifar10":
        dataset = torchvision.datasets.CIFAR10
    elif dataset_name == "cifar100":
        dataset = torchvision.datasets.CIFAR100
    elif dataset_name == "stl10":
        dataset = torchvision.datasets.STL10
    elif dataset_name == "imagenet":
        dataset = torchvision.datasets.ImageNet

    # data normalization
    image_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((.5, .5, .5), (.5, .5, .5))
    ])

    # training and validation data
    try:
        trainvalid = dataset(dataset_name,
                             split='train+unlabeled',
                             download=True,
                             transform=image_transform)
    except:
        try:
            trainvalid = dataset(dataset_name,
                                 split='train',
                                 download=True,
                                 transform=image_transform)
        except:
            trainvalid = dataset(dataset_name,
                                 train=True,
                                 download=True,
                                 transform=image_transform)

    trainset_size = int(len(trainvalid) * 0.9)
    trainset, validset = random_split(
        trainvalid,
        [trainset_size, len(trainvalid) - trainset_size])

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True)

    validloader = torch.utils.data.DataLoader(validset, batch_size=batch_size)

    # test data
    try:
        testdata = dataset(dataset_name,
                           split='test',
                           download=True,
                           transform=image_transform)
    except:
        testdata = dataset(dataset_name,
                           train=False,
                           download=True,
                           transform=image_transform)

    testloader = torch.utils.data.DataLoader(testdata, batch_size=batch_size)

    try:
        n_classes = int(np.max(trainvalid.labels) + 1)
    except:
        n_classes = int(np.max(trainvalid.targets) + 1)

    return trainloader, validloader, testloader, n_classes
Exemple #23
0
        torch.reshape(encode_sent.get('attention_mask'), (-1, )))

#%%
train_x = torch.stack(outputs)
train_x_mask = torch.stack(outputs_masks)
train_y = torch.tensor(train_df['polarity'], dtype=torch.long)

test_x = torch.stack(test_outputs)
test_x_mask = torch.stack(test_outputs_masks)

train_data = TensorDataset(train_x, train_x_mask, train_y)

test_data = TensorDataset(test_x, test_x_mask)

TrainData, ValidationData = random_split(
    train_data,
    [int(0.9 * len(train_data)),
     len(train_data) - int(0.9 * len(train_data))])

HIDDEN_DIM = 256
EMB_DIM = 768
BATCH_SIZE = 64
OUTPUT_DIM = 2
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
EPOCHS = 3
batch_size = 64
#Device = False

# loss and optimization functions
learningrate = 0.0001
Exemple #24
0
            loss += loss.item()
            acc += (output.argmax(1) == cls).sum().item()

    return loss / len(data_), acc / len(data_)


N_EPOCHS = 5
min_valid_loss = float('inf')

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

train_len = int(len(train_dataset) * 0.95)
sub_train_, sub_valid_ = \
    random_split(train_dataset, [train_len, len(train_dataset) - train_len])

for epoch in range(args.epochs):

    start_time = time.time()
    train_loss, train_acc = train_func(sub_train_)
    valid_loss, valid_acc = test(sub_valid_)

    secs = int(time.time() - start_time)
    mins = secs / 60
    secs = secs % 60

    print('Epoch: %d' % (epoch + 1),
          " | time in %d minutes, %d seconds" % (mins, secs))
    print(
        f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)'
Exemple #25
0
# Disable annoying PyTorch warnings
warnings.filterwarnings('ignore')

# Train the neural network
# Performs k-fold cross validation

for run in range(runs):
    if cv:
        # Split dataset into k folds for k-fold cross validation
        k = 10 # Number of data folds
        f_sizes = (k - 1) * [int(k/100. * len(dataset))]
        f_sizes.append(len(dataset) - sum(f_sizes))
        # Set seed
        torch.manual_seed(run)
        folds = random_split(dataset, f_sizes)
    else:
        # If not cv, devide dataset into training and test sets at random
        nb_train_data = int(0.9 * len(dataset))
        train_idx = np.random.choice(len(dataset), size=nb_train_data, replace=False)
        test_idx = np.array([i for i in range(len(dataset)) if i not in train_idx])
    
    # Training
    ntimes = k if cv else 1
    for i in range(ntimes):
        net = spi.ClassificationModule(d, f, out)
        net.double()
        criterion = F.cross_entropy if out > 1 else F.binary_cross_entropy
        optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
        if cv:
            test_dataloader = torch.utils.data.DataLoader(folds[i], batch_size=len(folds[i]))
Exemple #26
0
    # return none for the time being
    return mean_dice, mean_loss


# 3 pairs of dataset, CT (Train / test), MRI (Train/test), Siegen public MRI (Train/test)
train_a_dataset = UnpairedDataset('../', path_a='ct_sag_kr/train', path_b=None)
test_a_dataset = UnpairedDataset('../', path_a='ct_sag_kr/test', path_b=None)

train_b_dataset = UnpairedDataset('../', path_a='mr_sag_kr/train', path_b=None)
test_b_dataset = UnpairedDataset('../', path_a='mr_sag_kr/test', path_b=None)

train_c_dataset = UnpairedDataset('../', path_a='siegen/train', path_b=None)
test_c_dataset = UnpairedDataset('../', path_a='siegen/test', path_b=None)


train_a_dataset, _ = random_split(train_a_dataset, [num_a_train, len(train_a_dataset) - num_a_train])
val_a_dataset, test_a_dataset, _ = random_split(test_a_dataset, 
                                                [num_a_val, num_a_test, len(test_a_dataset) - (num_a_val + num_a_test)])

train_b_dataset, _ = random_split(train_b_dataset, [num_b_train, len(train_b_dataset) - num_b_train])
val_b_dataset, test_b_dataset, _ = random_split(test_b_dataset, 
                                                [num_b_val, num_b_test, len(test_b_dataset) - (num_b_val + num_b_test)])

train_c_dataset, _ = random_split(train_c_dataset, [num_c_train, len(train_c_dataset) - num_c_train])
val_c_dataset, test_c_dataset, _ = random_split(test_c_dataset, 
                                                [num_c_val, num_c_test, len(test_c_dataset) - (num_c_val + num_c_test)])


train_a_loader = DataLoader(train_a_dataset, batch_size=batch_a_size, num_workers=5, pin_memory=True)
val_a_loader = DataLoader(val_a_dataset, batch_size=1, shuffle=False, num_workers=5, pin_memory=True)
test_a_loader = DataLoader(test_a_dataset, batch_size=1, shuffle=False, num_workers=5, pin_memory=True)
Exemple #27
0
    # lr_steps = 5
    # lr_test = np.linspace(lr_min,lr_max,lr_steps)
    #lr_test = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5] #optimal was 0.0005 during first run
    #lr_test = [0.0005]

    #batch_size_test = [512, 256, 128, 64, 32, 16, 8]

    #momentum_test = [0.3, 0.6, 0.8, 0.9, 1, 1.1]


    #####################################################

    print('Using augmentation: ',transform_name)

    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform1)
    trainset, validset = random_split(trainset,[45000,5000])

    trainset_transformed2 = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform2)
    trainset_transformed2, valid_transformed2 = random_split(trainset_transformed2,[45000,5000])

    trainset_transformed3 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform3)
    trainset_transformed3, valid_transformed3 = random_split(trainset_transformed3,[45000,5000])

    trainset = ConcatDataset([trainset, trainset_transformed2, trainset_transformed3])
    #trainset = ConcatDataset([trainset, trainset_transformed2,])

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
    validloader = torch.utils.data.DataLoader(validset, batch_size=batch_size, shuffle=True, num_workers=2)


    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform1)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = self.log_smx(x)
        return x


# download data

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)
mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])

# train dataloader
mnist_train = DataLoader(mnist_train, batch_size=64)

# val dataloader
mnist_val = DataLoader(mnist_val, batch_size=64)

# test dataloader
mnist_test = DataLoader(mnist_test, batch_size=64)

# optimizer + scheduler
net = Net()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
Exemple #29
0
def create_train_and_test(model_data, split_percent=0.9):
    train_len = int(len(model_data) * split_percent)
    sub_train, sub_valid = random_split(
        model_data, [train_len, len(model_data) - train_len])
    return sub_train, sub_valid
Exemple #30
0
 def test_lengths_must_equal_datset_size(self):
     with self.assertRaises(ValueError):
         random_split([1, 2, 3, 4], [1, 2])
Exemple #31
0
from torch.utils.data.dataset import random_split
# Hyperparameters
EPOCHS = 10 # epoch
LR = 5  # learning rate
BATCH_SIZE = 64 # batch size for training
  
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
total_accu = None
train_iter, test_iter = AG_NEWS()
train_dataset = list(train_iter)
test_dataset = list(test_iter)
num_train = int(len(train_dataset) * 0.95)
split_train_, split_valid_ = \
    random_split(train_dataset, [num_train, len(train_dataset) - num_train])

train_dataloader = DataLoader(split_train_, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
valid_dataloader = DataLoader(split_valid_, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                             shuffle=True, collate_fn=collate_batch)

for epoch in range(1, EPOCHS + 1):
    epoch_start_time = time.time()
    train(train_dataloader)
    accu_val = evaluate(valid_dataloader)
    if total_accu is not None and total_accu > accu_val:
      scheduler.step()
    else:
Exemple #32
0
    #
    #   max_pwr_idx = np.argmax(valid_pwr)
    #   predictions.append(valid_freqs[max_pwr_idx] * 60.0)
    #   targets.append(data[1].numpy().tolist()[0][0])
    # print(predictions)
    # print(targets)
    # cor = np.corrcoef(np.array(predictions), np.array(targets))[0][1]
    # print("Correlation between input data and targets:", cor)
    # from scipy import signal
    # cross_corr = signal.correlate(predictions, targets, mode='same')
    # print(len(cross_corr), cross_corr)
    # plt.plot(cross_corr, label='cross-correlation')
    # plt.show()

    train_num = int(train_test_ratio * (len(dataset)))
    train_dataset, test_dataset = random_split(
        dataset, [train_num, len(dataset) - train_num])

    reporter = CLIReporter(metric_columns=[
        "train_loss", "loss", "mean_error", "training_iteration"
    ])
    results = tune.run(
        partial(model_train, train_dataset=train_dataset),
        # resources_per_trial={"cpu": 2},
        config=config,
        progress_reporter=reporter)

    # Plot loss during training
    dfs = results.fetch_trial_dataframes()
    [d.train_loss.plot() for d in dfs.values()]
    [d.loss.plot() for d in dfs.values()]
    plt.xlabel("epoch")
    def dal_active_learning(self):
        random_seeds = [123, 22, 69, 5, 108]
        gen_size = int(
            self.active_sample_size /
            self.no_classes)  ## reconfirm if its okay to keep it out side loop
        total_active_cycles = int(
            self.labelling_budget / self.active_sample_size) - 1

        for i in random_seeds:
            print("Executing Random seed " + str(i))

            self.save_dir = os.path.join(
                self.config.project_root,
                f'results/{self.config.model_name}/' + 'random_seed' + str(i))
            if not os.path.isdir(self.save_dir):
                self.save_dir = os.path.join(
                    self.config.project_root,
                    f'results/{self.config.model_name}/' + 'random_seed' +
                    str(i))
                os.makedirs(self.save_dir, exist_ok=True)

            _, human_cnn, model, optimizer, scheduler = model_selection(
                self.dataset, self.gan_type, self.device, self.active_learning,
                i)
            model.to(self.device)

            for i in range(5):

                seed = random_seeds[i]
                print("Executing Random seed " + str(i))

                active_learning_cycle = 0

                results_dir_name = 'results_fashionmnist_accuracy_seed_' + str(
                    seed)
                if not os.path.isdir(results_dir_name):
                    os.mkdir(results_dir_name)

                _, _, model, optimizer, scheduler = model_selection(
                    self.dataset, self.gan_type, self.device,
                    self.active_learning, i)

                model.to(self.device)

                train_dataset = self.data_loader[3]

                temp_list_data = [
                    train_dataset[i][0] for i in range(len(train_dataset))
                ]
                temp_list_data = torch.stack(temp_list_data)

                temp_list_labels = [
                    train_dataset.targets[i] for i in range(len(train_dataset))
                ]
                temp_list_labels = torch.stack(temp_list_labels)

                train_dataset = NewDataset(temp_list_data, temp_list_labels)

                if self.config.dataset == 'cifar10_2class':
                    split_data = random_split(train_dataset, [9000, 1000])
                else:
                    split_data = random_split(train_dataset, [50000, 10000])

                temp_train_dataset = deepcopy(split_data[0])
                validation_dataset = deepcopy(split_data[1])

                train_idx = temp_train_dataset.indices
                train_dataset.data = train_dataset.data[train_idx]
                train_dataset.targets = train_dataset.targets[train_idx]
                label_freq_cycle = torch.zeros(total_active_cycles,
                                               self.no_classes)

                # Initialisation of training examples

                num_samples_class = int(self.intial_samples / self.no_classes)

                numpy_labels = np.asarray(train_dataset.train_labels)
                sort_labels = np.sort(numpy_labels)
                sort_index = np.argsort(numpy_labels)
                unique, start_index = np.unique(sort_labels, return_index=True)
                training_index = []
                for s in start_index:
                    for i in range(num_samples_class):
                        training_index.append(sort_index[s + i])

                train_dataset.train_data = train_dataset.train_data[
                    training_index]
                train_dataset.train_labels = train_dataset.train_labels[
                    training_index]

                training_data_labels = train_dataset.train_labels.numpy()

                self.save_image(train_dataset.data)

                num_misclassifications, entropies, properly_classified_data, accuracy_list = (
                    [] for i in range(4))
                val_loader = torch.utils.data.DataLoader(
                    dataset=validation_dataset,
                    batch_size=self.batch_size,
                    shuffle=True)

                size = self.intial_samples
                while (size <= self.labelling_budget):
                    model, accuracy = self.get_cnn_accuracy(
                        train_dataset, val_loader, model, optimizer, scheduler)
                    accuracy_list.append(accuracy)
                    print("----Size of training data----", size)
                    print("-----Accuracy-------", accuracy)

                    label_freq = torch.Tensor(
                        np.unique(training_data_labels, return_counts=True)[1]
                        / SIZE)
                    label_freq_cycle[active_learning_cycle] = torch.Tensor(
                        label_freq)

                    new_samples, entropy = self.generator.generate_images(
                        model)
                    entropies.append(entropy)

                    if self.intial_samples == size:
                        self.save_image(new_samples)

                    outputs = human_cnn(new_samples)
                    _, latent_code = torch.max(outputs.data, 1)

                    new_samples = new_samples.data.cpu()

                    data = NewDataset(new_samples, latent_code.cpu())

                    train_dataset = torch.utils.data.ConcatDataset(
                        (train_dataset, data))
                    training_data_labels = np.append(training_data_labels,
                                                     np.array(latent_code))

                    size = len(train_dataset)
                    active_learning_cycle = active_learning_cycle + 1
                    _, _, model, optimizer, scheduler = model_selection(
                        self.dataset, self.gan_type, self.device,
                        self.active_learning, i)
                    model.to(self.device)

                if size % 2000 == 0:
                    path = self.save_dir + '/' + 'intermediate_results' + str(
                        size)
                    if not os.path.isdir(path):
                        os.mkdir(path)
                    torch.save(accuracy_list, path + '/accuracy_list')
                    torch.save(entropies, path + '/entropies')
                    torch.save(label_freq_cycle, path + '/label_frequency')
                    torch.save(torch.LongTensor(num_misclassifications),
                               path + '/misclassifications')
                    torch.save(train_dataset, path + '/train_dataset')

            print("--------Random seed " + str(i) + "completed--------")