Ejemplo n.º 1
0
def make_dataset(df, scalar_encoder, transforms, test=False):
    """
    Does all the data manipulation and creates a dataset ready to be fed to the model
    :param df: pandas daraframe
    :param scalar_encoder: encoder to encode scalars
    :param transforms: data transformations
    :param test: indicate whether this a training/validation dataset or a test dataset
    :return: dataset
    """
    # Concat Bands into (N, 2, 75, 75) images
    band_1 = np.concatenate([im for im in df['band_1']]).reshape(-1, 75, 75)
    band_2 = np.concatenate([im for im in df['band_2']]).reshape(-1, 75, 75)
    inc_angle = np.nan_to_num(df['inc_angle'].values)
    inc_angle = scalar_encoder.encode(inc_angle)
    # inc_angle_tr = np.zeros(train['inc_angle'].values.shape)
    full_img = np.stack([band_1, band_2], axis=1)

    # Dataset and DataLoader
    imgs = torch.from_numpy(full_img).float()
    angles = torch.from_numpy(inc_angle).float()
    if test:
        targets = None
    else:
        targets = torch.from_numpy(df['is_iceberg'].values).long()
    dataset_imgs = TensorDataset(imgs, targets, input_transform=transforms)
    dataset_angles = TensorDataset(angles, None)
    dataset = MultiDataset((dataset_imgs, dataset_angles))
    return dataset
y_train = y_train.long()
x_test = x_test.float()
y_test = y_test.long()

x_train = x_train / 255.
x_test = x_test / 255.
x_train = x_train.unsqueeze(1)
x_test = x_test.unsqueeze(1)

# only train on a subset
x_train = x_train[:10000]
y_train = y_train[:10000]
x_test = x_test[:1000]
y_test = y_test[:1000]

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32)
val_dataset = TensorDataset(x_test, y_test)
val_loader = DataLoader(val_dataset, batch_size=32)

# Define your model EXACTLY as if you were using nn.Module
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(1600, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
 def fit(self, x, y, batch_size, nb_epoch):
     train_loader = TensorDataset(x, y, batch_size=batch_size)
     for epoch in range(nb_epoch):
         self.train_loop(train_loader)
        y_batch = Variable(y_batch)
        self.opt.zero_grad()
        ypred = self(x_batch)
        loss = self.loss_fn(ypred, y_batch)
        loss.backward()
        self.opt.step()

        return loss.data[0]


## Test that the network actually produces the correctly-sized output
net = Network()
net.set_optimizer(optim.Adam(net.parameters()))
net.set_loss(F.nll_loss)

## create sampler
from torchsample import TensorDataset
from torchsample.transforms import RangeNormalize

NB_EPOCH = 10
BATCH_SIZE = 32

train_loader = TensorDataset(x_train,
                             y_train,
                             transform=RangeNormalize(0., 1., n_channels=3),
                             batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=0)

net.fit_loader(train_loader, nb_epoch=10)
Ejemplo n.º 5
0
from torchsample import TensorDataset
import torch
import numpy as np

x = torch.ones(10, 1, 30, 30)
y = torch.from_numpy(np.arange(10))

loader = TensorDataset(x, y, batch_size=2)

xbatch, ybatch = loader.next_batch()
print(ybatch.numpy())

xbatch, ybatch = loader.next_batch(3)
print(ybatch.numpy())

xbatch, ybatch = loader.next_batch()
print(ybatch.numpy())

# SAMPLE A FIXED NUMBER OF BATCHES WITHOUT STOPPING
for i in range(1000):
    xbatch, ybatch = loader.next_batch()

# MAKE ONLY ONE PASS THROUGH THE DATA
for xbatch, ybatch in loader:
    pass

# DEMONSTRATE THAT THE DATA WILL STILL BE SHUFFLE IF YOU USE NEXT_BATCH()
loader = TensorDataset(x, y, batch_size=2, shuffle=True)
# 10 loops = 2 epochs (n=10 / batch_size = 2 --> 5 loops per "epoch")
for i in range(10):
    if i == 5:
Ejemplo n.º 6
0
#4  -  0
#2  -  1

#5  -  0
#6  -  1

#You can see that it evenly samples each batch from
#the 0 and 1 classes. To use it in a sampler:
import torch
import numpy as np
from torchsample import TensorDataset

x = torch.randn(8, 2)
y = torch.from_numpy(np.array([0, 0, 1, 1, 0, 0, 1, 1]))

loader = TensorDataset(x, y, batch_size=4, sampler='stratified')

for xbatch, ybatch in loader:
    print(ybatch.numpy())

# AND IT WORKS FOR MORE THAN 2 CLASSES
import torch
import numpy as np
from torchsample import TensorDataset

x = torch.randn(8, 2)
y = torch.from_numpy(np.array([0, 0, 1, 1, 2, 2, 3, 3]))

loader = TensorDataset(x, y, batch_size=4, sampler='stratified')

for xbatch, ybatch in loader:
Ejemplo n.º 7
0
#plt.hist(inc_angle_test, bins=100)
#plt.hist(inc_angle_tr, bins=100)

plt.show()

del data
full_img_tr = np.stack([band_1_tr, band_2_tr], axis=1)

my_transforms = transforms.RandomAffine(rotation_range=180,
                                        translation_range=0.2,
                                        shear_range=None,
                                        zoom_range=(0.8, 1.2))

my_transforms = transforms.Compose(my_transforms.transforms)
test_imgs = torch.from_numpy(full_img_tr).float().cuda()
test_dataset = TensorDataset(test_imgs, input_transform=my_transforms)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

print("loader len:", len(test_loader))

while (True):
    #index = np.random.randint(0, len(data), 1)
    index = 0
    for _data in tqdm(test_loader, total=len(test_loader)):
        print(index)
        plot_sample(_data.squeeze_().cpu())
        index += 1

    best_gmm = fit_gmm(band_1_tr[index])
    print("means: ", best_gmm.means_)
    print("covs: ", best_gmm.covariances_)
y_train = y_train.long()
x_test = x_test.float()
y_test = y_test.long()

x_train = x_train / 255.
x_test = x_test / 255.
x_train = x_train.unsqueeze(1)
x_test = x_test.unsqueeze(1)

# only train on a subset
x_train = x_train[:1000]
y_train = y_train[:1000]
x_test = x_test[:1000]
y_test = y_test[:1000]

train_data = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_data, batch_size=128)


# Define your model EXACTLY as if you were using nn.Module
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(1600, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))