Ejemplo n.º 1
0
def build_network(verbose=False, **kwargs):
#network_name, data_augmentation='full', lambda2=0.0005, max_epochs=50, nb_channels=3, crop_size=200,
                  #activation_function=rectify, batch_size=48, init_learning_rate=0.01, final_learning_rate=0.0001, dataset_ratio=3.8, final_ratio=2., verbose=False):
    """Build nolearn neural network and returns it

    :param network: pre-defined network name
    :param data_augmentation: type of batch data aug. ('no', 'flip' or 'full')
    :return: NeuralNet nolearn object
    """
    for key,val in kwargs.items():
        exec(key + '=val')
    #data_augmentation = kwargs['data_augmentation']
    if data_augmentation == 'no':
        batch_iterator_train = BatchIterator(batch_size=batch_size)
    elif data_augmentation == 'flip':
        batch_iterator_train = FlipBatchIterator(batch_size=batch_size)
    elif data_augmentation == 'full':
        batch_iterator_train = DataAugmentationBatchIterator(batch_size=batch_size, crop_size=crop_size)
    elif data_augmentation == 'resampling':
        batch_iterator_train = ResamplingBatchIterator(batch_size=batch_size, crop_size=crop_size, scale_delta=scale_delta, max_trans=max_trans, angle_factor=angle_factor,
                                                       max_epochs=max_epochs, dataset_ratio=dataset_ratio, final_ratio=final_ratio)
    elif data_augmentation == 'resampling-flip':
        batch_iterator_train = ResamplingFlipBatchIterator(batch_size=batch_size,
                                                       max_epochs=max_epochs, dataset_ratio=dataset_ratio, final_ratio=final_ratio)
    else:
        raise ValueError(data_augmentation+' is an unknown data augmentation strategy.')

    layers = build_layers(network, nb_channels=nb_channels, crop_size=crop_size,
                          activation_function=activation_function)

    conv_net = NeuralNet(
        layers,

        update=nesterov_momentum,
        update_learning_rate=theano.shared(float32(learning_init)),
        update_momentum=theano.shared(float32(0.9)),
        on_epoch_finished=[
            AdjustVariable('update_learning_rate', start=learning_init, stop=learning_final),
            AdjustVariable('update_momentum', start=0.9, stop=0.999),
            EarlyStopping(patience=patience),
            ],

        batch_iterator_train = batch_iterator_train,
        # batch_iterator_test=DataAugmentationBatchIterator(batch_size=31, crop_size=crop_size),

        objective=regularization_objective,
        objective_lambda2=lambda2,

        train_split=TrainSplit(eval_size=0.1, stratify=True),
        custom_score=('AUC-ROC', auc_roc),
        max_epochs=max_epochs,
        verbose=3,
        )
    if verbose:
        print conv_net.__dict__
    return conv_net
Ejemplo n.º 2
0
    def setup_net(self, print_out=True):
        if print_out:
            self.printer = PrintLogSave(out_file=self.out_file)
            on_epoch_finished = [self.printer, SaveBestWeights(method=self.method, full_dataset=self.full_dataset), 
                                            EarlyStopping(patience=200, out_file=self.out_file)]
            on_training_started = [SaveLayerInfo(out_file=self.out_file)]
        else:
            on_epoch_finished = [EarlyStopping(patience=200, out_file=self.out_file)]
            on_training_started = []

        if self.flip:
            #batch_iterator_train=flip.ResizeBatchIterator(batch_size=128) 
            batch_iterator_train=flip.FlipBatchIterator(batch_size=128)
        else:
            batch_iterator_train=flip.ResizeBatchIterator(batch_size=128) 

        if self.adaptive_learning:
            update_learning_rate = theano.shared(utils.float32(0.03))
            update_momentum = theano.shared(utils.float32(0.9))
            on_epoch_finished.append(AdjustVariable('update_learning_rate', start=0.03, stop=0.0001))
            on_epoch_finished.append(AdjustVariable('update_momentum', start=0.9, stop=0.999))
        else: 
            update_learning_rate = 0.01
            update_momentum = 0.9


        layers, layer_params = my_net.MyNeuralNet.produce_layers(self.num_layers, dropout=self.dropout)      
        self.net = my_net.MyNeuralNet(
            layers=layers,
            num_layers=self.num_layers,
            input_shape=(None, 3, utils.CROP_SIZE, utils.CROP_SIZE),
            output_num_units=3,

            output_nonlinearity=lasagne.nonlinearities.softmax,
            preproc_scaler = None, 
        
            #learning rates
            update_learning_rate=update_learning_rate,
            update_momentum=update_momentum,
        
            #printing
            net_name=self.net_name,
            on_epoch_finished=on_epoch_finished,
            on_training_started=on_training_started,

            #data augmentation
            batch_iterator_test= flip.ResizeBatchIterator(batch_size=128),
            batch_iterator_train=batch_iterator_train,

        
            max_epochs=self.epochs,
            verbose=1,
            **layer_params
            )
        return layer_params
Ejemplo n.º 3
0
def main():
    nottingham = pickle.load(file("Nottingham.pickle"))
    train = hot_to_sparse(nottingham['train'], 88)
    print len(train)
    np.random.shuffle(train)
    net = ClockworkRNN((88, (4, 30), 88),
                       update_fn=adam,
                       learning_rate=0.001,
                       cost=quadratic_loss)
    batch_size = 700
    losses = []
    lrs = []
    norms, momentum_norms = [], []
    norms_thres, momentum_norms_thres = [], []
    # X, y, x_series = create_batch_func_params(500, 0.1, 2)
    best = np.inf
    last_best_index = 0
    decrement = float32(0.99)
    for i in range(2000):
        start = time()
        closs, cnorm, cnorm_threshold, cmomentum_norm, cmomentum_norm_threshold = [], [], [], [], []
        for g in xrange(0, len(train), batch_size):
            loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt(
                train[g:g + batch_size, :-1], train[g:g + batch_size, 1:])
            closs.append(loss)
            cnorm.append(norm)
            cnorm_threshold.append(norms_thres)
            cmomentum_norm.append(momentum_norm)
            cmomentum_norm_threshold.append(momentum_norm_threshold)

        losses.append(np.mean(closs))
        norms.append(np.mean(cnorm))
        momentum_norms.append(np.mean(cnorm_threshold))
        norms_thres.append(np.mean(float32(cnorm_threshold)))
        momentum_norms_thres.append(
            np.mean([float32(cnmt) for cnmt in cmomentum_norm_threshold]))
        lrs.append(net.learning_rate.get_value())
        epoch_time = time() - start
        print i, ':', losses[-1], " took :", epoch_time

        if best > losses[-1]:
            last_best_index = i
            best = losses[-1]
        elif i - last_best_index > 20:
            best = losses[-1]
            new_rate = net.learning_rate.get_value() * decrement
            net.learning_rate.set_value(new_rate)
            last_best_index = i
            print("New learning rate", new_rate)
Ejemplo n.º 4
0
    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = utils.float32(self.ls[epoch-1])
        getattr(nn, self.name).set_value(new_value)
Ejemplo n.º 5
0
    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = utils.float32(self.ls[epoch - 1])
        getattr(nn, self.name).set_value(new_value)
Ejemplo n.º 6
0
def create_batch_func_params(input_length=300, freq_var=0.1, size=20):
    freqs = float32(np.abs(np.random.normal(scale=freq_var, size=size)) + 0.1)
    # freqs = np.ones(size, dtype=floatX) * float32(0.1)
    X = np.array([np.ones(input_length, dtype=floatX) * freq for freq in freqs], dtype=floatX)[:, :, np.newaxis]
    x_series = np.array([np.linspace(0, input_length * freq, num=input_length, dtype=floatX) for freq in freqs],
                        dtype=floatX)
    y = func_to_learn(x_series).astype(floatX)[:, :, np.newaxis]
    return X, y, x_series
Ejemplo n.º 7
0
 def __getitem__(self, index, return_manip=False):
     x_link = self.X[index]
     x = None
     if self.lazy:
         x = self._load_and_transform(x_link)
     if self.train == False:
         m = float32(1. if '_manip' in x_link else 0.)
         x = (x, m)
     return x, self.y[index]
Ejemplo n.º 8
0
 def __call__(self, nn, train_history):
     epoch = train_history[-1]['epoch']
     if epoch in self.schedule:
         new_value = self.schedule[epoch]
         if new_value == 'stop':
             if self.weights_file is not None:
                 nn.save_params_to(self.weights_file)
             raise StopIteration
         getattr(nn, self.name).set_value(utils.float32(new_value))
Ejemplo n.º 9
0
def main():
    nottingham = pickle.load(file("Nottingham.pickle"))
    train = hot_to_sparse(nottingham['train'], 88)
    print len(train)
    np.random.shuffle(train)
    net = ClockworkRNN((88, (4, 30), 88), update_fn=adam, learning_rate=0.001, cost=quadratic_loss)
    batch_size = 700
    losses = []
    lrs = []
    norms, momentum_norms = [], []
    norms_thres, momentum_norms_thres = [], []
    # X, y, x_series = create_batch_func_params(500, 0.1, 2)
    best = np.inf
    last_best_index = 0
    decrement = float32(0.99)
    for i in range(2000):
        start = time()
        closs, cnorm, cnorm_threshold, cmomentum_norm, cmomentum_norm_threshold = [], [], [], [], []
        for g in xrange(0, len(train), batch_size):
            loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt(train[g:g+batch_size, :-1], train[g:g+batch_size, 1:])
            closs.append(loss)
            cnorm.append(norm)
            cnorm_threshold.append(norms_thres)
            cmomentum_norm.append(momentum_norm)
            cmomentum_norm_threshold.append(momentum_norm_threshold)

        losses.append(np.mean(closs))
        norms.append(np.mean(cnorm))
        momentum_norms.append(np.mean(cnorm_threshold))
        norms_thres.append(np.mean(float32(cnorm_threshold)))
        momentum_norms_thres.append(np.mean([float32(cnmt) for cnmt in cmomentum_norm_threshold]))
        lrs.append(net.learning_rate.get_value())
        epoch_time = time() - start
        print i, ':', losses[-1], " took :", epoch_time

        if best > losses[-1]:
            last_best_index = i
            best = losses[-1]
        elif i - last_best_index > 20:
            best = losses[-1]
            new_rate = net.learning_rate.get_value() * decrement
            net.learning_rate.set_value(new_rate)
            last_best_index = i
            print("New learning rate", new_rate)
Ejemplo n.º 10
0
    def __init__(self, layer_specs=(1, (2, 20), (2, 10), 1), cost=quadratic_loss, update_fn=adam, learning_rate=0.001,
                 alpha=1.):
        self.alpha = theano.shared(float32(alpha))
        self.layer_specs = layer_specs
        self.layers = []
        previous_size = layer_specs[0]
        self.params = []
        self.cost = cost
        self.update_fn = update_fn
        self.learning_rate = theano.shared(float32(learning_rate))
        self.training_step = theano.shared(float32(1))

        for i, spec in enumerate(layer_specs[1:-1]):
            spec = [spec[0], spec[1], previous_size]
            self.layers.append(ClockworkLayer(*spec, activation_function=T.tanh))
            self.params.extend(self.layers[-1].params)
            previous_size = spec[0] * spec[1]
        self.layers.append(OutputLayer(layer_specs[-1], previous_size, T.tanh))
        self.params.extend(self.layers[-1].params)
Ejemplo n.º 11
0
def get_estimator(n_features, files, labels, eval_size=0.1):
    layers = [
        (InputLayer, {
            'shape': (None, n_features)
        }),
        (DenseLayer, {
            'num_units': N_HIDDEN_1,
            'nonlinearity': rectify,
            'W': init.Orthogonal('relu'),
            'b': init.Constant(0.01)
        }),
        (FeaturePoolLayer, {
            'pool_size': 2
        }),
        (DenseLayer, {
            'num_units': N_HIDDEN_2,
            'nonlinearity': rectify,
            'W': init.Orthogonal('relu'),
            'b': init.Constant(0.01)
        }),
        (FeaturePoolLayer, {
            'pool_size': 2
        }),
        (DenseLayer, {
            'num_units': 1,
            'nonlinearity': None
        }),
    ]
    args = dict(
        layers=layers,
        update=adam,
        update_learning_rate=theano.shared(utils.float32(START_LR)),
        batch_iterator_train=ResampleIterator(BATCH_SIZE),
        batch_iterator_test=BatchIterator(BATCH_SIZE),
        objective=nn.get_objective(l1=L1, l2=L2),
        # eval_size=eval_size,
        custom_score=('kappa', utils.kappa) if eval_size > 0.0 else None,
        on_epoch_finished=[
            nn.Schedule('update_learning_rate', SCHEDULE),
        ],
        regression=True,
        max_epochs=N_ITER,
        verbose=1,
    )
    net = BlendNet(eval_size=eval_size, **args)
    net.set_split(files, labels)
    return net
Ejemplo n.º 12
0
def create_net(config, **kwargs):
    args = {
        'layers':
        config.layers,
        'batch_iterator_train':
        iterator.ResampleIterator(config,
                                  batch_size=config.get('batch_size_train')),
        'batch_iterator_test':
        iterator.SharedIterator(config,
                                deterministic=True,
                                batch_size=config.get('batch_size_test')),
        'on_epoch_finished': [
            Schedule('update_learning_rate',
                     config.get('schedule'),
                     weights_file=config.final_weights_file),
            SaveBestWeights(
                weights_file=config.weights_file,
                loss='kappa',
                greater_is_better=True,
            ),
            SaveWeights(config.weights_epoch, every_n_epochs=5),
            SaveWeights(config.weights_best, every_n_epochs=1, only_best=True),
        ],
        'objective':
        get_objective(),
        'use_label_encoder':
        False,
        'eval_size':
        0.1,
        'regression':
        True,
        'max_epochs':
        1000,
        'verbose':
        1,
        'update_learning_rate':
        theano.shared(utils.float32(config.get('schedule')[0])),
        'update':
        nesterov_momentum,
        'update_momentum':
        0.9,
        'custom_score': ('kappa', utils.kappa),
    }
    args.update(kwargs)
    net = Net(**args)
    return net
Ejemplo n.º 13
0
def _gen_predict_train_loaders(max_len=500):
    X_b = []
    y_b = []
    manip_b = []

    for c in xrange(10):
        for b in xrange(N_BLOCKS[c]):
            X_block = np.load(
                os.path.join(args.data_path,
                             'X_{0}_{1}.npy'.format(c, b % N_BLOCKS[c])))
            X_b += [X_block[i] for i in xrange(len(X_block))]
            y_b += np.repeat(c, len(X_block)).tolist()
            manip_b += [float32(0.)] * len(X_block)
            if len(y_b) >= max_len:
                yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b
                X_b = []
                y_b = []
                manip_b = []

    for c in xrange(10):
        for b in xrange(N_PSEUDO_BLOCKS[c]):
            X_pseudo_block = np.load(
                os.path.join(
                    args.data_path,
                    'X_pseudo_{0}_{1}.npy'.format(c, b % N_PSEUDO_BLOCKS[c])))
            X_b += [X_pseudo_block[i] for i in xrange(len(X_pseudo_block))]
            y_b += np.repeat(c, len(X_pseudo_block)).tolist()
            manip_block = np.load(
                os.path.join(
                    args.data_path,
                    'manip_pseudo_{0}_{1}.npy'.format(c,
                                                      b % N_PSEUDO_BLOCKS[c])))
            manip_b += [m for m in manip_block]
            if len(y_b) >= max_len:
                yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b
                X_b = []
                y_b = []
                manip_b = []

    if y_b > 0:
        yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b
Ejemplo n.º 14
0
def _make_predict_train_loader(X_b, manip_b, manip_ratio=0.):
    assert len(X_b) == len(manip_b)

    # make dataset
    rng = RNG(1337)
    train_transforms_list = [
        transforms.Lambda(lambda (x, m): (Image.fromarray(x), m)),
        # if `val` == False
        #   972/1982 manip pseudo images
        #   images : pseudo = approx. 48 : 8 = 6 : 1
        #   to get unalt : manip = 70 : 30 (like in test metric),
        #   we manip ~24.7% of non-pseudo images
        # else:
        #   we simply use same ratio as in validation (0.18)
        transforms.Lambda(lambda (img, m): (make_random_manipulation(img, rng, crop_policy='center', crop_size=512), float32(1.)) if \
                          m[0] < 0.5 and rng.rand() < manip_ratio else (center_crop(img, 512), m))
    ]
    train_transforms_list += make_aug_transforms(rng)
    if args.crop_size == 512:
        train_transforms_list += [
            transforms.Lambda(lambda (
                img, m): ([img, img.transpose(Image.ROTATE_90)], [m] * 2)),
            transforms.Lambda(lambda (crops, ms): (torch.stack([
                transforms.Normalize(args.means, args.stds)
                (transforms.ToTensor()(crop)) for crop in crops
            ]), torch.from_numpy(np.asarray(ms))))
        ]
    else:
        train_transforms_list += [
            transforms.Lambda(lambda (img, m): (transforms.TenCrop(
                args.crop_size)(img), [m] * 10)),
            transforms.Lambda(lambda (imgs, ms): (list(
                imgs) + [img.transpose(Image.ROTATE_90)
                         for img in imgs], ms + ms)),
            transforms.Lambda(lambda (crops, ms): (torch.stack([
                transforms.Normalize(args.means, args.stds)
                (transforms.ToTensor()(crop)) for crop in crops
            ]), torch.from_numpy(np.asarray(ms))))
        ]
    train_transform = transforms.Compose(train_transforms_list)
    dataset = make_numpy_dataset(X=[(x, m) for x, m in zip(X_b, manip_b)],
                                 y=np.zeros(len(X_b), dtype=np.int64),
                                 transform=train_transform)

    # make loader
    loader = DataLoader(dataset=dataset,
                        batch_size=args.batch_size,
                        shuffle=False,
                        num_workers=args.n_workers)
    return loader
Ejemplo n.º 15
0
def train(optimizer, train_optimizer=train_optimizer):
    # load and crop validation data
    print "Loading data ..."
    X_val = np.load(os.path.join(args.data_path, 'X_val.npy'))
    y_val = np.load(os.path.join(args.data_path, 'y_val.npy'))
    manip_val = np.zeros(
        (len(y_val), 1), dtype=np.float32
    )  # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy'))  # 68/480 manipulated
    c = args.crop_size
    C = X_val.shape[1]
    if c < C:
        X_val = X_val[:, C / 2 - c / 2:C / 2 + c / 2,
                      C / 2 - c / 2:C / 2 + c / 2, :]
    if args.kernel:
        X_val = [conv_K(x) for x in X_val]

    # make validation loader
    rng = RNG(args.random_seed + 42 if args.random_seed else None)
    val_transform = transforms.Compose([
        transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)),
        ########
        # 1 - (480-68-0.3*480)/(480-68) ~ 0.18
        ########
        transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\
                                               m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (img, m, y)),
        transforms.Lambda(lambda (img, m, y): ([img,
                                                img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \
                                                KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)),
        transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)),
        transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m))
    ])
    np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val))
    val_dataset = make_numpy_dataset(X=[
        (x, m, y) for x, m, y in zip(X_val, manip_val, y_val)
    ],
                                     y=y_val,
                                     transform=val_transform)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.n_workers)

    n_runs = args.epochs / args.epochs_per_unique_data + 1

    for _ in xrange(n_runs):
        train_loader = make_train_loaders(block_index=optimizer.epoch /
                                          args.epochs_per_unique_data)
        optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data
        train_optimizer(optimizer, train_loader, val_loader)
Ejemplo n.º 16
0
def make_train_loaders(block_index):
    # assemble data
    X_train = []
    y_train = []
    manip_train = []

    for c in xrange(10):
        X_block = np.load(
            os.path.join(args.data_path,
                         'X_{0}_{1}.npy'.format(c, block_index % N_BLOCKS[c])))
        X_block = [X_block[i] for i in xrange(len(X_block))]
        if args.bootstrap:
            X_block = [X_block[i] for i in b_ind[c][block_index % N_BLOCKS[c]]]
        X_train += X_block
        y_train += np.repeat(c, len(X_block)).tolist()
        manip_train += [float32(0.)] * len(X_block)

    for c in xrange(10):
        X_pseudo_block = np.load(
            os.path.join(
                args.data_path, 'X_pseudo_{0}_{1}.npy'.format(
                    c, block_index % N_PSEUDO_BLOCKS[c])))
        X_pseudo_block = [
            X_pseudo_block[i] for i in xrange(len(X_pseudo_block))
        ]
        if args.bootstrap:
            X_pseudo_block = [
                X_pseudo_block[i]
                for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]]
            ]
        X_train += X_pseudo_block
        y_train += np.repeat(c, len(X_pseudo_block)).tolist()
        manip_block = np.load(
            os.path.join(
                args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(
                    c, block_index % N_PSEUDO_BLOCKS[c])))
        manip_block = [m for m in manip_block]
        if args.bootstrap:
            manip_block = [
                manip_block[i]
                for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]]
            ]
        manip_train += manip_block

    shuffle_ind = range(len(y_train))
    RNG(seed=block_index).shuffle(shuffle_ind)
    X_train = [X_train[i] for i in shuffle_ind]
    y_train = [y_train[i] for i in shuffle_ind]
    manip_train = [manip_train[i] for i in shuffle_ind]

    # make dataset
    rng = RNG(args.random_seed)
    train_transforms_list = [
        transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)),
        ######
        # 972/1982 manip pseudo images
        # images : pseudo = approx. 48 : 8 = 6 : 1
        # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images
        ######
        transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \
                          m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)),
        transforms.Lambda(lambda (img, m, y): ([img,
                                                img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \
                                                KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)),
    ]
    train_transforms_list += make_aug_transforms(rng)

    if args.kernel:
        train_transforms_list += [
            transforms.Lambda(lambda (img, m):
                              (conv_K(np.asarray(img, dtype=np.uint8)), m)),
            transforms.Lambda(lambda (x, m):
                              (torch.from_numpy(x.transpose(2, 0, 1)), m))
        ]
    else:
        train_transforms_list += [
            transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m))
        ]
    train_transforms_list += [
        transforms.Lambda(lambda (img, m): (transforms.Normalize(
            args.means, args.stds)(img), m))
    ]
    train_transform = transforms.Compose(train_transforms_list)
    dataset = make_numpy_dataset(X=[
        (x, m, y) for x, m, y in zip(X_train, manip_train, y_train)
    ],
                                 y=y_train,
                                 transform=train_transform)

    # make loader
    loader = DataLoader(dataset=dataset,
                        batch_size=args.batch_size,
                        shuffle=False,
                        num_workers=args.n_workers,
                        sampler=StratifiedSampler(
                            class_vector=np.asarray(y_train),
                            batch_size=args.batch_size))
    return loader
Ejemplo n.º 17
0
def make_train_loaders(block_index, distill=True):
    # assemble data
    X_train = []
    y_train = []
    manip_train = []
    soft_logits = []

    if distill:
        soft_logits_ind = []

        for c in xrange(10):
            b = block_index % N_BLOCKS[c]
            X_block = np.load(
                os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, b)))
            X_block = [X_block[i] for i in xrange(len(X_block))]
            if args.bootstrap:
                X_block = [X_block[i] for i in b_ind[c][b]]
            X_train += X_block
            y_train += np.repeat(c, len(X_block)).tolist()
            manip_train += [float32(0.)] * len(X_block)
            soft_logits_ind_block = SOFT_LOGITS_IND[c][b]
            if args.bootstrap:
                soft_logits_ind_block = [
                    soft_logits_ind_block[i] for i in b_ind[c][b]
                ]
            soft_logits_ind += soft_logits_ind_block

        for c in xrange(10):
            b = N_PSEUDO_BLOCKS_FOR_VALIDATION + block_index % N_PSEUDO_BLOCKS[
                c]
            X_pseudo_block = np.load(
                os.path.join(args.data_path,
                             'X_pseudo_{0}_{1}.npy'.format(c, b)))
            X_pseudo_block = [
                X_pseudo_block[i] for i in xrange(len(X_pseudo_block))
            ]
            if args.bootstrap:
                X_pseudo_block = [
                    X_pseudo_block[i] for i in b_pseudo_ind[c][b]
                ]
            X_train += X_pseudo_block
            y_train += np.repeat(c, len(X_pseudo_block)).tolist()
            manip_block = np.load(
                os.path.join(args.data_path,
                             'manip_pseudo_{0}_{1}.npy'.format(c, b)))
            manip_block = [m for m in manip_block]
            if args.bootstrap:
                manip_block = [manip_block[i] for i in b_pseudo_ind[c][b]]
            manip_train += manip_block
            soft_logits_ind_block = SOFT_LOGITS_IND[10 + c][b]
            if args.bootstrap:
                soft_logits_ind_block = [
                    soft_logits_ind_block[i] for i in b_pseudo_ind[c][b]
                ]
            soft_logits_ind += soft_logits_ind_block

        soft_logits = np.load(os.path.join(
            args.data_path, 'logits_train.npy')).astype(np.float32)
        soft_logits -= soft_logits.mean(axis=1)[:, np.newaxis]
        soft_logits = [
            soft_logits[i] / max(args.temperature, 1.) for i in soft_logits_ind
        ]

    else:
        for c in xrange(10):
            X_block = np.load(
                os.path.join(args.data_path, G[c][block_index % len(G[c])]))
            X_block = [X_block[i] for i in xrange(len(X_block))]
            X_train += X_block
            y_train += np.repeat(c, len(X_block)).tolist()
            manip_train += [float32(0.)] * len(X_block)
            soft_logits += [np.zeros((10), dtype=np.float32)] * len(X_block)

    shuffle_ind = range(len(y_train))
    RNG(seed=block_index * 41).shuffle(shuffle_ind)
    X_train = [X_train[i] for i in shuffle_ind]
    y_train = [y_train[i] for i in shuffle_ind]
    manip_train = [manip_train[i] for i in shuffle_ind]
    soft_logits = [soft_logits[i] for i in shuffle_ind]

    # make dataset
    rng = RNG(args.random_seed)
    train_transforms_list = [
        transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)),
        ######
        # 972/1982 manip pseudo images
        # images : pseudo = approx. 48 : 8 = 6 : 1
        # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images
        ######
        transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \
                          m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)),
        transforms.Lambda(lambda (img, m, y): ([img,
                                                img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \
                                                True else (img, m)),
    ]
    train_transforms_list += make_aug_transforms(rng)

    if args.kernel:
        train_transforms_list += [
            transforms.Lambda(lambda (img, m):
                              (conv_K(np.asarray(img, dtype=np.uint8)), m)),
            transforms.Lambda(lambda (x, m):
                              (torch.from_numpy(x.transpose(2, 0, 1)), m))
        ]
    else:
        train_transforms_list += [
            transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m))
        ]
    train_transforms_list += [
        transforms.Lambda(lambda (img, m): (transforms.Normalize(
            args.means, args.stds)(img), m))
    ]
    train_transform = transforms.Compose(train_transforms_list)
    dataset = make_numpy_dataset(X=[
        (x, m, y) for x, m, y in zip(X_train, manip_train, y_train)
    ],
                                 y=y_train,
                                 transform=train_transform,
                                 soft_logits=soft_logits)

    # make loader
    loader = DataLoader(dataset=dataset,
                        batch_size=args.batch_size,
                        shuffle=False,
                        num_workers=args.n_workers,
                        sampler=StratifiedSampler(
                            class_vector=np.asarray(y_train),
                            batch_size=args.batch_size))
    return loader
Ejemplo n.º 18
0
def train(optimizer, train_optimizer=train_optimizer):
    # load and crop validation data
    print "Loading data ..."
    X_val = np.load(os.path.join(args.data_path, 'X_val.npy'))
    y_val = np.load(os.path.join(args.data_path, 'y_val.npy')).tolist()
    manip_val = np.zeros(
        (len(y_val), 1), dtype=np.float32
    )  # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy'))  # 68/480 manipulated
    d = args.crop_size * 2
    D = X_val.shape[1]
    if d < D:
        X_val = X_val[:, D / 2 - d / 2:D / 2 + d / 2,
                      D / 2 - d / 2:D / 2 + d / 2, :]
    if args.kernel:
        X_val = [conv_K(x) for x in X_val]
    X_val = [X_val[i] for i in xrange(len(X_val))]
    manip_val = [manip_val[i] for i in xrange(len(manip_val))]

    for b in xrange(N_PSEUDO_BLOCKS_FOR_VALIDATION):
        for c in xrange(10):
            X_block = np.load(
                os.path.join(args.data_path,
                             'X_pseudo_{0}_{1}.npy'.format(c, b)))
            y_val += [c] * len(X_block)
            d = args.crop_size * 2
            D = X_block.shape[1]
            if d < D:
                X_block = X_block[:, D / 2 - d / 2:D / 2 + d / 2,
                                  D / 2 - d / 2:D / 2 + d / 2, :]
            X_val += [X_block[i] for i in xrange(len(X_block))]
            manip_block = np.load(
                os.path.join(args.data_path,
                             'manip_pseudo_{0}_{1}.npy'.format(c, b)))
            manip_val += [m for m in manip_block]

    # make validation loader
    rng = RNG(args.random_seed + 42 if args.random_seed else None)
    val_transform = transforms.Compose([
        transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)),
        ########
        # 1 - (480-68-0.3*480)/(480-68) ~ 0.18
        ########
        transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\
                                               m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (center_crop(img, args.crop_size), m, y)),
        # transforms.Lambda(lambda (img, m, y): ([img,
        #                                         img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \
        #                                         True else (img, m)),
        transforms.Lambda(lambda (img, m, y): (transforms.ToTensor()(img), m)),
        transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m))
    ])
    np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val))
    val_dataset = make_numpy_dataset(X=[
        (x, m, y) for x, m, y in zip(X_val, manip_val, y_val)
    ],
                                     y=y_val,
                                     transform=val_transform)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.n_workers)

    for _ in xrange(args.distill_epochs / args.epochs_per_unique_data):
        train_loader = make_train_loaders(block_index=optimizer.epoch /
                                          args.epochs_per_unique_data,
                                          distill=True)
        optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data
        train_optimizer(optimizer, train_loader, val_loader)

    n_runs = args.epochs / args.epochs_per_unique_data + 1

    for _ in xrange(n_runs):
        optimizer.distill_cost = 0.
        train_loader = make_train_loaders(block_index=optimizer.epoch /
                                          args.epochs_per_unique_data,
                                          distill=False)
        optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data
        train_optimizer(optimizer, train_loader, val_loader)
Ejemplo n.º 19
0
def main():
    net = ClockworkRNN((1, (4, 30), 1), update_fn=adam, learning_rate=0.001)
    # ones = np.ones((2, 10, 1), dtype=floatX)
    # res = net.fprop(ones)
    losses = []
    lrs = []
    norms, momentum_norms = [], []
    norms_thres, momentum_norms_thres = [], []
    # X, y, x_series = create_batch_func_params(500, 0.1, 2)
    X, y, x_series = create_batch_func_params(410, 0.1, 2000)
    best = np.inf
    last_best_index = 0
    decrement = float32(0.99)
    for i in range(8000):
        start = time()
        loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt(X, y)
        losses.append(loss)
        norms.append(norm)
        momentum_norms.append(momentum_norm)
        norms_thres.append(float32(norm_theshold))
        momentum_norms_thres.append(float32(momentum_norm_threshold))
        lrs.append(net.learning_rate.get_value())
        epoch_time = time() - start
        print i, ':', losses[-1], " took :", epoch_time

        if best > losses[-1]:
            last_best_index = i
            best = losses[-1]
        elif i - last_best_index > 20:
            best = losses[-1]
            new_rate = net.learning_rate.get_value() * decrement
            net.learning_rate.set_value(new_rate)
            last_best_index = i
            print("New learning rate", new_rate)

    with open('rnn.pickle', 'wb') as f:
        pickle.dump(net, f)

    # plt.figure(figsize=(40, 20), dpi=100)
    fig, ax1 = plt.subplots(figsize=(30, 10))
    ax2 = ax1.twinx()

    ax1.plot(losses, label='loss')
    ax1.set_ylabel('Loss')

    ax2.plot(lrs, color="red", label='learning rate')
    ax2.set_ylabel('Learning rate')
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1 + h2, l1 + l2, loc='best')

    fig.savefig('rnn_learning_rate_and_cost.jpg')
    plt.clf()

    fig, ax1 = plt.subplots(figsize=(30, 10))
    ax2 = ax1.twinx()

    ax1.plot(losses[-1000:], label='loss')
    ax1.set_ylabel('Loss')

    ax2.plot(lrs[-1000:], color="red", label='learning rate')
    ax2.set_ylabel('Learning rate')
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1 + h2, l1 + l2, loc='best')

    fig.savefig('rnn_learning_rate_and_cost_last1000.jpg')
    plt.clf()

    # NORMS
    fig, ax1 = plt.subplots(figsize=(30, 10))
    ax2 = ax1.twinx()

    ax1.plot(norms, label="norm", color='red')
    ax1.plot(norms_thres, label="norm threshold", color='blue')
    ax1.set_ylabel('Gradient norm')

    ax2.plot(momentum_norms, label="momentum norms", color='green')
    ax2.plot(momentum_norms_thres, label="momentum norms threshold", color='purple')
    ax2.set_ylabel('Momentum norm')
    h1, l1 = ax1.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax1.legend(h1 + h2, l1 + l2, loc='best')
    fig.savefig('rnn_norms.jpg')
    plt.clf()

    # X, y, x_series = X[:10], y[:10], x_series[:10]
    X, y, x_series = create_batch_func_params(1500, 0.1, 10)

    prediction = net.fprop(X)
    # plt.close('all')
    fig, axarr = plt.subplots(len(X), sharex=True, figsize=(160, 5 * X.shape[0]))
    for i in range(len(X)):
        # axarr[i].figure.figure = plt.figure(figsize=(150, 12), dpi=100)
        axarr[i].set_title('freq:' + str(X[i][i]))
        axarr[i].plot(prediction[i], label='model', color='blue')
        axarr[i].plot(y[i], label='actual', color='green')
        axarr_twin = axarr[i].twinx()
        axarr_twin.plot(np.abs(y[i] - prediction[i]), label='error', color='red')
        h1, l1 = axarr[i].get_legend_handles_labels()
        h2, l2 = axarr_twin.get_legend_handles_labels()
        axarr[i].legend(h1 + h2, l1 + l2, loc=0)

    fig.savefig('rnn_prediction_vs_actual.jpg')

    print "done"