Example #1
0
    def __init__(self,
                 src_h5,
                 tgt_h5,
                 src_vocab,
                 tgt_vocab,
                 batch_size,
                 max_length,
                 epoch_size=None,
                 max_val_size=None,
                 distributed=False,
                 world_size=None,
                 pindex=None):

        self.batch_size = batch_size
        self.max_length = max_length + 1

        # Load HDF5 data file.
        self.datasets = {}
        self.loaders = {}
        for dsname in ['train', 'valid']:
            src = H5Dataset(src_h5, dsname).data[:, :max_length]
            tgt = H5Dataset(tgt_h5, dsname).data[:, :max_length + 1]
            self.max_length = \
                min(self.max_length + 1, src.shape[1], tgt[:,:-1].shape[1])
            srctgt = dask.array.concatenate((src, tgt[:, :-1]), axis=1)

            # Do not include BOS tokens in target output.
            tgt2 = tgt[:, 1:]

            # Shrink datasets if they are too large.
            if dsname == 'train':
                epoch_sz = epoch_size
                srctgt = srctgt[max_val_size:]
                tgt2 = tgt2[max_val_size:]
            elif dsname == 'valid' and max_val_size:
                epoch_sz = min(epoch_size, max_val_size)
                srctgt = srctgt[:max_val_size]
                tgt2 = tgt2[:max_val_size]
            self.datasets[dsname] = DaskDataset(srctgt, tgt2)

            if distributed:
                sampler = DistributedSubSampler(self.datasets[dsname],
                                                world_size,
                                                pindex,
                                                epoch_size=epoch_sz)
            else:
                sampler = SubSampler(self.datasets[dsname],
                                     epoch_size=epoch_sz)
            self.loaders[dsname] = DataLoaderWithVocab(
                self.datasets[dsname],
                batch_size=self.batch_size,
                shuffle=False,
                sampler=sampler,
                src_vocab=src_vocab,
                tgt_vocab=tgt_vocab)
Example #2
0
def main(model_state, model_name):

    device = torch.device("cuda:1")
    print(torch.cuda.current_device())

    params = {'batch_size': 512, 'shuffle': True, 'num_workers': 1}

    transform = transforms.Compose([
        transforms.ToTensor(),
        # transforms.Normalize(mean=[178.6047284, 137.2459255, 176.28579374], std=[59.86620922, 70.70835133, 54.3316497 ]),
    ])

    test_dataset = H5Dataset('/home/junoon/Data/PatchCamelyon_v1/test/x.h5',
                             '/home/junoon/Data/PatchCamelyon_v1/test/y.h5',
                             transform)
    test_generator = data.DataLoader(test_dataset, **params)

    model = None
    if model_name == "simpleconv":
        model = simpleconv.SimpleConv()

    model.load_state_dict(torch.load(model_state))
    model.to(device)

    test(None, model, device, test_generator)
Example #3
0
def main(batch_size, epochs, lr, momentum, seed, log_interval, save_model):
    # Training settings
    torch.manual_seed(seed)

    device = torch.device("cuda:1")
    print(torch.cuda.current_device())

    params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 1}

    transform = transforms.Compose([
        transforms.ToTensor(),
        # transforms.Normalize(mean=[178.6047284, 137.2459255, 176.28579374], std=[59.86620922, 70.70835133, 54.3316497 ]),
    ])

    train_dataset = H5Dataset('/home/junoon/Data/PatchCamelyon_v1/train/x.h5',
                              '/home/junoon/Data/PatchCamelyon_v1/train/y.h5',
                              transform)
    train_generator = data.DataLoader(train_dataset, **params)

    valid_dataset = H5Dataset('/home/junoon/Data/PatchCamelyon_v1/valid/x.h5',
                              '/home/junoon/Data/PatchCamelyon_v1/valid/y.h5',
                              transform)
    valid_generator = data.DataLoader(valid_dataset, **params)

    model = simpleconv.SimpleConv().to(device)
    print("learning rate", lr)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    start_time = time.time()

    for epoch in range(1, epochs + 1):
        train(log_interval, model, device, train_generator, optimizer, epoch)
        test(None, model, device, valid_generator)

    print(f"""Total time taken: {time.time() - start_time}""")

    if save_model:
        torch.save(model.state_dict(), "./saved_models/simpleconv.pt")
Example #4
0
headers = ["pm25"]

#readFile = h5py.File('./pre_data/2018010116.h5','r')
#dataset = readFile['2018010116'][:] #shape is (169,269,239,26)

#trainingset = AirDataset(dataset[:120]) #8281
#validationset = AirDataset(dataset[120:])
#loader_train = DataLoader(trainingset, batch_size=batch_size, shuffle=True, num_workers=16, drop_last=True)
#loader_valid = DataLoader(validationset, batch_size=batch_size, shuffle=True, num_workers=16, drop_last=True)
#loader_test = DataLoader(validationset, batch_size=batch_size, shuffle=True, num_workers=16, drop_last=True)

train_path = "./train_daqisuo_PM25_mini_6to1.h5"
val_path = "./valid_daqisuo_PM25_6to1.h5"
test_path = "./test_daqisuo_PM25_6to1.h5"

h5train = H5Dataset(train_path)
h5val = H5Dataset(val_path)
h5test = H5Dataset(test_path)

# h5train =torch.utils.data.DataLoader(H5Dataset(train_path))
# h5val =torch.utils.data.DataLoader(H5Dataset(val_path))
# h5test = torch.utils.data.DataLoader(H5Dataset(test_path))

loader_train = DataLoader(h5train,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=16,
                          drop_last=True)
loader_valid = DataLoader(h5val,
                          batch_size=batch_size,
                          shuffle=True,
Example #5
0
train_path_macong = "/home/datanfs/macong_data/train_daqisuo.h5"
val_path_macong = "/home/datanfs/macong_data/valid_daqisuo.h5"

train_path_zhulifa = "/home/zhulifa/python-dev/macong_data/train_daqisuo.h5"
val_path_zhulifa = "/home/zhulifa/python-dev/macong_data/valid_daqisuo.h5"

train_path_52 = "/home/zhulifa/PM25-work/daqisuo_data/train_daqisuo_v2.h5"
val_path_52 = "/home/zhulifa/PM25-work/daqisuo_data/valid_daqisuo_v2.h5"

train_path = "/home/datanfs/anhui/PM25Pred/train_daqisuo.h5"
val_path = "/home/datanfs/anhui/PM25Pred/valid_daqisuo.h5"
test_path = "./test_daqisuo.h5"

print("##### start load dataset #####")
h5train = H5Dataset(train_path_zhulifa)
h5val = H5Dataset(val_path_zhulifa)
# h5test = H5Dataset(test_path)

loader_train = DataLoader(h5train,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=16,
                          drop_last=True)
loader_valid = DataLoader(h5val,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=16,
                          drop_last=True)
# loader_test =  DataLoader(h5test, batch_size=1,shuffle=False,num_workers=16)
print("##### load dataset over #####")
Example #6
0
    with torch.no_grad():
        for batch_idx, sample in enumerate(test_loader):
            data = Variable(sample['input'].unsqueeze(-1).permute(0, 1, 4, 2, 3).float()).to(device)
            target = Variable(sample['output'].squeeze().float()).to(device)
            output = model(data).squeeze()
            test_loss += crit(output, target).item()
    test_loss /= len(test_loader)
    # Horovod: average metric values across workers.
    # test_loss = metric_average(test_loss, 'avg_loss')
    print('\nTest set: Average loss: {:.4f}\n'.format(test_loss))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

headers=["pm25"]
test_path = "./test_daqisuo_PM25_6to1.h5"
h5test =H5Dataset(test_path)
loader_test = DataLoader(h5test, batch_size=1,shuffle=False,num_workers=16)

height = 339 #269
width = 432 #239
# input_dim = 10 #26
input_dim = 1
n_layer = 2
hidden_size = [64, 128]
output_dim = 1
n_epoch = 1000
learning_rate = 1e-4
weight_decay = 0.9
weight_decay_epoch = 10
MSEmetric = nn.MSELoss()