コード例 #1
0
def load_dataset_into_batches(file_dir_path: str, subset: Subset, subset_size: int, shuffle: bool = False):
    op = ops.chain([
        ops.vectorize(),
        ops.type_cast(np.float32)
    ])
    dataset = PetsDataset(file_dir_path, subset)
    return BatchGenerator(dataset, subset_size, shuffle, op)
コード例 #2
0
ファイル: test2.py プロジェクト: helmuthb/dlvc2020
def main():
    data = PetsDataset("/home/helmuth/dlvc/cifar-10-batches-py", Subset.TRAINING)
    # ops chain
    op = ops.chain([
        ops.vectorize(),
        ops.type_cast(np.float32),
        ops.add(-127.5),
        ops.mul(1/127.5),
    ])
    # batch generator #1
    bg1 = BatchGenerator(data, len(data), False)
    assert(len(bg1) == 1)
    # batch generator #2
    bg2 = BatchGenerator(data, 500, False, op)
    assert(len(bg2) == 16)
    # first batch
    cnt = 0
    for batch in bg2:
        cnt += 1
        if cnt < 16:
            assert(batch.data.shape == (500, 3072))
            assert(batch.labels.shape == (500,))
        assert(batch.data.dtype == np.float32)
        assert(np.issubdtype(batch.labels.dtype, np.integer))
        if cnt == 1:
            print("First batch, first sample, not shuffled")
            print(batch.data[0])
    # batch generator #3
    bg3 = BatchGenerator(data, 500, True, op)
    # run 5 times through first sample of shuffled batch generator
    for i in range(5):
        it = iter(bg3)
        print("First batch, first sample, shuffled")
        print(next(it).data[0])
コード例 #3
0
 def test_data_transformation(self):
     op = ops.chain([ops.vectorize(), ops.type_cast(np.float32)])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir),
                           Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 100, False, op)
     self.assertEqual(len(batch_gen), 80)
     iter_gen = iter(batch_gen)
     iter_result = next(iter_gen)
     self.assertEqual(iter_result.data[0].shape, (3072, ))
     self.assertTrue(np.issubdtype(iter_result.data.dtype, np.float32))
コード例 #4
0
def load_dataset(subset: Subset) -> batches.BatchGenerator:
    dataset = PetsDataset('../data/cifar-10-batches-py', subset)

    op = ops.chain([
        ops.hwc2chw(),
        ops.add(-127.5),
        ops.mul(1 / 127.5),
        ops.type_cast(np.float32)
    ])

    return batches.BatchGenerator(dataset, 128, True, op)
コード例 #5
0
 def test_train_with_wrong_type_of_labels(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     self.assertRaises(TypeError, classifier.train, iter_result.data, [0, 1, 0])
コード例 #6
0
 def test_train_with_proper_data(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     classifier.train(iter_result.data, iter_result.label)
コード例 #7
0
 def test_train_wrong_vector_size_in_data(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     batch_gen = BatchGenerator(dataset, 7959, False, op)
     batch_iter = iter(batch_gen)
     iter_result = next(batch_iter)
     classifier = KnnClassifier(10, 3072, 2)
     changed_data = np.delete(iter_result.data, 100, 1)
     self.assertRaises(RuntimeError, classifier.train, changed_data, iter_result.label)
コード例 #8
0
 def test_correctness_of_data_for_train(self):
     op = ops.chain([
         ops.vectorize(),
         ops.type_cast(np.float32)
     ])
     dataset = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
     one_batch_gen = BatchGenerator(dataset, 7959, False, op)
     self.assertEqual(len(one_batch_gen), 1)
     many_batch_gen = BatchGenerator(dataset, 500, False, op)
     self.assertEqual(len(many_batch_gen), 16)
     reference = [116., 125., 125., 91., 101.]
     batch_iter = iter(many_batch_gen)
     batch_iter = next(batch_iter)
     [self.assertEqual(item, reference[i]) for i, item in enumerate(batch_iter.data[0][:5])]
コード例 #9
0
def load_dataset(subset: Subset, augment=False) -> batches.BatchGenerator:
    dataset = PetsDataset('../data/cifar-10-batches-py', subset)

    ops_list = []

    if augment:
        ops_list += [ops.hflip(), ops.rcrop(32, 12, 'constant')]

    ops_list += [
        ops.mul(1 / 255),
        ops.type_cast(np.float32),
        # Imagenet:
        # ops.normalize(  mean=np.array([0.485, 0.456, 0.406]),
        #                 std=np.array([0.229, 0.224, 0.225])),
        # Cifar-10:
        ops.normalize(mean=np.array([0.41477802, 0.45935813, 0.49693552]),
                      std=np.array([0.25241926, 0.24699265, 0.25279155])),
        ops.hwc2chw()
    ]

    op = ops.chain(ops_list)

    return batches.BatchGenerator(dataset, 128, True, op)
コード例 #10
0
    def test_predict_with_proper_data(self):

        op = ops.chain([
            ops.vectorize(),
            ops.type_cast(np.float32)
        ])
        dataset_training = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.TRAINING)
        dataset_valid = PetsDataset(os.path.join(os.getcwd(), self._data_dir), Subset.VALIDATION)

        batch_gen_t = BatchGenerator(dataset_training, 795, False, op)
        batch_gen_v = BatchGenerator(dataset_valid, 204, False, op)

        batch_iter_t = iter(batch_gen_t)
        iter_result_t = next(batch_iter_t)

        batch_iter_v = iter(batch_gen_v)
        iter_result_v = next(batch_iter_v)

        classifier = KnnClassifier(10, 3072, 2)
        classifier.train(iter_result_t.data, iter_result_t.label)
        results = classifier.predict(iter_result_v.data)
        self.assertEqual(len(results), 204)
        for result in results:
            self.assertEqual(np.sum(result), 1.0)
コード例 #11
0
from dlvc.dataset import Subset
import dlvc.ops as ops

np.random.seed(0)
torch.manual_seed(0)

TrainedModel = namedtuple('TrainedModel', ['model', 'accuracy'])

DATA_PATH = "../cifar-10-batches-py/"
train_data = PetsDataset(DATA_PATH, Subset.TRAINING)
val_data = PetsDataset(DATA_PATH, Subset.VALIDATION)
test_data = PetsDataset(DATA_PATH, Subset.TEST)

op = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hwc2chw()
])

train_batches = BatchGenerator(train_data, 128, False, op)
val_batches = BatchGenerator(val_data, 128, False, op)
test_batches = BatchGenerator(test_data, 128, False, op)


class Net(nn.Module):
    def __init__(self, img_size, num_classes):
        super(Net, self).__init__()
        self.img_size = img_size

        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
コード例 #12
0
TrainedModel = namedtuple('TrainedModel', ['model', 'accuracy'])

# initialize RNG for reproducability
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

# Step 1: load the data sets (TRAIN, VALIDATION & TEST)
train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION)
test_data = PetsDataset("../cifar-10-batches-py", Subset.TEST)

# Operations to standardize
op = ops.chain([
    ops.vectorize(),
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1/127.5),
])
# Step 2: Create batch generator for each
BATCH_SIZE = 512
train_batches = BatchGenerator(train_data, BATCH_SIZE, True, op)
val_batches = BatchGenerator(val_data, BATCH_SIZE, True, op)
test_batches = BatchGenerator(test_data, BATCH_SIZE, True, op)

def train_model(lr: float, momentum: float) -> TrainedModel:
    '''
    Trains a linear classifier with a given learning rate (lr) and momentum.
    Computes the accuracy on the validation set.
    Returns both the trained classifier and accuracy.
    '''
コード例 #13
0
import numpy as np

from dlvc.dataset import Subset
from dlvc.datasets.pets import PetsDataset
from dlvc import ops, batches

dataset = PetsDataset('../data/cifar-10-batches-py', Subset.TRAINING)

op = ops.chain([ops.mul(1 / 255), ops.type_cast(np.float32)])

batch_generator = batches.BatchGenerator(dataset, 7959, True, op)

training_images = []

for batch in batch_generator:
    training_images.append(batch.data)

training_images = np.array(training_images, dtype=np.float32)
training_images = training_images.reshape(training_images.shape[1:])

train_mean = np.mean(training_images, axis=(0, 1, 2))
train_std = np.std(training_images, axis=(0, 1, 2))

print(train_mean, train_std)
コード例 #14
0
ファイル: cnn_cats_dogs.py プロジェクト: helmuthb/dlvc2020
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Batch size to be used
BATCH_SIZE = 128

# Step 1: load the data sets (TRAIN, VALIDATION)
train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION)

# Operations to standardize
# First experiment: scale to [-1,1]
op1 = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hwc2chw()
])
# Second experiment: scale to sample mean=0, sd=1
# calculate average training sample mean & sd
op_calc = ops.chain([ops.type_cast(np.float32), ops.mean_sd()])
# using batch generator (could do it directly but I'm lazy)
train_full_batch_gen = BatchGenerator(train_data, len(train_data), False,
                                      op_calc)
train_full_batch = next(b for b in train_full_batch_gen)
train_mean_sd = np.mean(train_full_batch.data, axis=0)
# create operation to scale
op2 = ops.chain([
    ops.type_cast(np.float32),
    ops.scale(train_mean_sd[0], train_mean_sd[1]),
    ops.hwc2chw()
コード例 #15
0
def load_dataset(subset: Subset) -> batches.BatchGenerator:
    dataset = PetsDataset('../data/cifar-10-batches-py', subset)

    op = ops.chain([ops.vectorize(), ops.type_cast(np.float32)])

    return batches.BatchGenerator(dataset, len(dataset), True, op)
コード例 #16
0
ファイル: run_script.py プロジェクト: FabianTraxler/DLVC
DATA_PATH = "../cifar-10-batches-py/"
RESULTS_FILE = "results.txt"
NR_EPOCHS = 100
EARLY_STOPPING = 10

CUDA = torch.cuda.is_available()

train_data = PetsDataset(DATA_PATH, Subset.TRAINING)
val_data = PetsDataset(DATA_PATH, Subset.VALIDATION)

op_all_augmentation = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hflip(),
    ops.rcrop(32, 4, 'constant'),
    ops.add_noise(),
    ops.hwc2chw()
])

op_augmentation_crop_flip = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hflip(),
    ops.rcrop(32, 4, 'constant'),
    ops.hwc2chw()
])

op_no_augmentation = ops.chain([
コード例 #17
0
from dlvc.dataset import Subset
import dlvc.ops as ops

np.random.seed(0)
torch.manual_seed(0)

DATA_PATH = "../cifar-10-batches-py/"
MODEL_PATH = "best_model.pt"
train_data = PetsDataset(DATA_PATH, Subset.TRAINING)
val_data = PetsDataset(DATA_PATH, Subset.VALIDATION)

op = ops.chain([
    ops.type_cast(np.float32),
    ops.add(-127.5),
    ops.mul(1 / 127.5),
    ops.hflip(),
    ops.rcrop(32, 4, 'constant'),
    ops.add_noise(),
    ops.hwc2chw()
])

train_batches = BatchGenerator(train_data, 128, False, op)
val_batches = BatchGenerator(val_data, 128, False, op)


class Net(nn.Module):
    def __init__(self, img_size, num_classes):
        super(Net, self).__init__()
        self.img_size = img_size

        # Instantiate the ReLU nonlinearity
コード例 #18
0
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Batch size to be used
BATCH_SIZE = 128

# Step 1: load the data sets (TRAIN, VALIDATION)
train_data = PetsDataset("../cifar-10-batches-py", Subset.TRAINING)
val_data = PetsDataset("../cifar-10-batches-py", Subset.VALIDATION)

# Operations to standardize
# scale to sample mean=0, sd=1
# calculate average training sample mean & sd
op_calc = ops.chain([
    ops.type_cast(np.float32),
    ops.mean_sd()
])
# using batch generator (could do it directly but I'm lazy)
train_full_batch_gen = BatchGenerator(
    train_data,
    len(train_data),
    False,
    op_calc)
train_full_batch = next(b for b in train_full_batch_gen)
train_mean_sd = np.mean(train_full_batch.data, axis=0)
# create operation to scale
op2 = ops.chain([
    ops.type_cast(np.float32),
    ops.scale(train_mean_sd[0], train_mean_sd[1]),
    ops.hwc2chw()
])
コード例 #19
0
ファイル: mlp_cats_dogs.py プロジェクト: martinmatak/dlvc2018
BATCH_SIZE = 128
NUM_CLASSES = 2
EPOCHS = 500
lr = 0.001
# weight decay 0 in this configuration, in part 3 this is changed
wd = 0.0

pets_training = PetsDataset(dir, Subset.TRAINING)
pets_validation = PetsDataset(dir, Subset.VALIDATION)
pets_test = PetsDataset(dir, Subset.TEST)


batchGenerator_training = BatchGenerator(pets_training, BATCH_SIZE, shuffle=True,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))
batchGenerator_validation = BatchGenerator(pets_validation, BATCH_SIZE, shuffle=False,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))
batchGenerator_test = BatchGenerator(pets_test, BATCH_SIZE, shuffle=False,
                                         op=chain([type_cast(dtype=np.float32),
                                                   add(-127.5),
                                                   mul(1 / 127.5),
                                                   hwc2chw()]))


class CatDogNet(nn.Module):
    def __init__(self):
コード例 #20
0
wd = 0.00000001

EARLY_STOPPING = True
EARLY_STOPPING_NUM_OF_EPOCHS = 100
USE_DROPOUT = True

pets_training = PetsDataset(dir, Subset.TRAINING)
pets_validation = PetsDataset(dir, Subset.VALIDATION)

batchGenerator_training = BatchGenerator(pets_training,
                                         BATCH_SIZE,
                                         shuffle=True,
                                         op=chain([
                                             type_cast(dtype=np.float32),
                                             add(-127.5),
                                             mul(1 / 127.5),
                                             rcrop(25, 2, 'median'),
                                             hwc2chw()
                                         ]))
batchGenerator_validation = BatchGenerator(pets_validation,
                                           BATCH_SIZE,
                                           shuffle=False,
                                           op=chain([
                                               type_cast(dtype=np.float32),
                                               add(-127.5),
                                               mul(1 / 127.5),
                                               hwc2chw()
                                           ]))


class CatDogNet(nn.Module):
コード例 #21
0
    # skip last batch
    if batch_idx == last_batch_idx:
        continue
    assert batch.data.shape == (500, 3072), "Batch data shape: " + str(
        batch.data.shape) + ", expected: (500, 3072)."
    assert batch.labels.shape == (500, ), "Batch labels shape: " + str(
        batch.labels.shape) + ", expected: (500,)."
    batch_idx += 1

# The data type is always np.float32 and the label type is integral (one of the np.int and np.uint variants)
# Implemented: for label type np.uint8 since there is less than 256 labels
batch_generator = BatchGenerator(
    dataset_training,
    500,
    False,
    op=chain([vectorize(), type_cast(dtype=np.float32)]))
for batch in batch_generator:
    assert batch.data.dtype == np.float32, "Batch data type: " + str(
        batch.data.dtype) + ", expected: np.float32."
    assert batch.labels.dtype == np.uint8, "Batch labels type: " + str(
        batch.labels.dtype) + ", expected: np.uint8."

# The first sample of the first training batch returned without shuffling
# has label 0 ...
batch_generator = BatchGenerator(
    dataset_training,
    500,
    False,
    op=chain([type_cast(dtype=np.float32),
              vectorize()]))
first_sample_label_unshuffled = None
コード例 #22
0
    dataset = PetsDataset('../data/cifar-10-batches-py',
                          Subset.TEST)

    print('Found dataset directory')
    print(f'{len(dataset)} samples')

    test_sample = dataset[1]

    print(f'Index of test sample: {test_sample.idx}')
    print(f'Label of test sample: {test_sample.label}')
    #cv2.imshow('Sample Image', test_sample.data)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()

    op = ops.chain([
        ops.vectorize(),
        ops.type_cast(np.float32)
    ])

    #The number of training batches is 1 if the batch size is set to the number of samples in the dataset
    generator = batches.BatchGenerator(dataset, len(dataset), True, op)
    print(len(generator))

    #The number of training batches is 16 if the batch size is set to 500
    generator = batches.BatchGenerator(dataset, 500, True, op)
    print(len(generator))

    #The data and label shapes are (500, 3072) and (500,), respectively, unless for the last batch
    #The data type is always np.float32 and the label type is integral (one of the np.int and np.uint variants)
    #generator = batches.BatchGenerator(dataset, 20, False, op)
    for i in iter(generator):
        pass
コード例 #23
0
def set_parameter(model, freeze_parameters):
    if freeze_parameters:
        for param in model.parameters():
            param.requires_grad = False


if USE_TRANSFER_LEARNING:
    # there are two networks to use in transfer learning "resnet" and "alexnet"
    net = initialize_transfer_learning_model("resnet", NUM_CLASSES,
                                             FREEZE_CNN_PARAMETERS)
    net, input_size = net
    pad_mode_for_resizing = 'constant'
    op_chain = chain([
        type_cast(dtype=np.float32),
        add(-127.5),
        mul(1 / 127.5),
        rcrop(25, 2, 'median'),
        resize(input_size, pad_mode_for_resizing),
        hwc2chw()
    ])
else:
    net = CatDogNet()
    op_chain = chain([
        type_cast(dtype=np.float32),
        add(-127.5),
        mul(1 / 127.5),
        rcrop(25, 2, 'median'),
        hwc2chw()
    ])

batchGenerator_training = BatchGenerator(pets_training,
                                         BATCH_SIZE,
コード例 #24
0
# The data and label shapes are (500, 3072) and (500,), respectively, unless for the last batch
batch_generator = BatchGenerator(dataset_training, 500, shuffle=False, op=vectorize())
last_batch_idx = len(batch_generator) - 1
batch_idx = 0
for batch in batch_generator:
    # skip last batch
    if batch_idx == last_batch_idx:
        continue
    assert batch.data.shape == (500, 3072), "Batch data shape: " + str(batch.data.shape) + ", expected: (500, 3072)."
    assert batch.label.shape == (500,), "Batch labels shape: " + str(batch.label.shape) + ", expected: (500,)."
    batch_idx += 1

# The data type is always np.float32 and the label type is integral (one of the np.int and np.uint variants)
# Implemented: for label type np.uint8 since there is less than 256 labels
batch_generator = BatchGenerator(dataset_training, 500, False, op=chain([vectorize(), type_cast(dtype=np.float32)]))
for batch in batch_generator:
    assert batch.data.dtype == np.float32, "Batch data type: " + str(batch.data.dtype) + ", expected: np.float32."
    assert batch.label.dtype == np.uint8, "Batch labels type: " + str(batch.label.dtype) + ", expected: np.uint8."

# The first sample of the first training batch returned without shuffling
# has label 0 ...
batch_generator = BatchGenerator(dataset_training, 500, False, op=chain([type_cast(dtype=np.float32), vectorize()]))
first_sample_label_unshuffled = None
first_sample_data_unshuffled = None
expected_label = 0
for batch in batch_generator:
    for label in batch.label:
        first_sample_label_unshuffled = label
        break
    for data in batch.data: