def build_network(verbose=False, **kwargs): #network_name, data_augmentation='full', lambda2=0.0005, max_epochs=50, nb_channels=3, crop_size=200, #activation_function=rectify, batch_size=48, init_learning_rate=0.01, final_learning_rate=0.0001, dataset_ratio=3.8, final_ratio=2., verbose=False): """Build nolearn neural network and returns it :param network: pre-defined network name :param data_augmentation: type of batch data aug. ('no', 'flip' or 'full') :return: NeuralNet nolearn object """ for key,val in kwargs.items(): exec(key + '=val') #data_augmentation = kwargs['data_augmentation'] if data_augmentation == 'no': batch_iterator_train = BatchIterator(batch_size=batch_size) elif data_augmentation == 'flip': batch_iterator_train = FlipBatchIterator(batch_size=batch_size) elif data_augmentation == 'full': batch_iterator_train = DataAugmentationBatchIterator(batch_size=batch_size, crop_size=crop_size) elif data_augmentation == 'resampling': batch_iterator_train = ResamplingBatchIterator(batch_size=batch_size, crop_size=crop_size, scale_delta=scale_delta, max_trans=max_trans, angle_factor=angle_factor, max_epochs=max_epochs, dataset_ratio=dataset_ratio, final_ratio=final_ratio) elif data_augmentation == 'resampling-flip': batch_iterator_train = ResamplingFlipBatchIterator(batch_size=batch_size, max_epochs=max_epochs, dataset_ratio=dataset_ratio, final_ratio=final_ratio) else: raise ValueError(data_augmentation+' is an unknown data augmentation strategy.') layers = build_layers(network, nb_channels=nb_channels, crop_size=crop_size, activation_function=activation_function) conv_net = NeuralNet( layers, update=nesterov_momentum, update_learning_rate=theano.shared(float32(learning_init)), update_momentum=theano.shared(float32(0.9)), on_epoch_finished=[ AdjustVariable('update_learning_rate', start=learning_init, stop=learning_final), AdjustVariable('update_momentum', start=0.9, stop=0.999), EarlyStopping(patience=patience), ], batch_iterator_train = batch_iterator_train, # batch_iterator_test=DataAugmentationBatchIterator(batch_size=31, crop_size=crop_size), objective=regularization_objective, objective_lambda2=lambda2, train_split=TrainSplit(eval_size=0.1, stratify=True), custom_score=('AUC-ROC', auc_roc), max_epochs=max_epochs, verbose=3, ) if verbose: print conv_net.__dict__ return conv_net
def setup_net(self, print_out=True): if print_out: self.printer = PrintLogSave(out_file=self.out_file) on_epoch_finished = [self.printer, SaveBestWeights(method=self.method, full_dataset=self.full_dataset), EarlyStopping(patience=200, out_file=self.out_file)] on_training_started = [SaveLayerInfo(out_file=self.out_file)] else: on_epoch_finished = [EarlyStopping(patience=200, out_file=self.out_file)] on_training_started = [] if self.flip: #batch_iterator_train=flip.ResizeBatchIterator(batch_size=128) batch_iterator_train=flip.FlipBatchIterator(batch_size=128) else: batch_iterator_train=flip.ResizeBatchIterator(batch_size=128) if self.adaptive_learning: update_learning_rate = theano.shared(utils.float32(0.03)) update_momentum = theano.shared(utils.float32(0.9)) on_epoch_finished.append(AdjustVariable('update_learning_rate', start=0.03, stop=0.0001)) on_epoch_finished.append(AdjustVariable('update_momentum', start=0.9, stop=0.999)) else: update_learning_rate = 0.01 update_momentum = 0.9 layers, layer_params = my_net.MyNeuralNet.produce_layers(self.num_layers, dropout=self.dropout) self.net = my_net.MyNeuralNet( layers=layers, num_layers=self.num_layers, input_shape=(None, 3, utils.CROP_SIZE, utils.CROP_SIZE), output_num_units=3, output_nonlinearity=lasagne.nonlinearities.softmax, preproc_scaler = None, #learning rates update_learning_rate=update_learning_rate, update_momentum=update_momentum, #printing net_name=self.net_name, on_epoch_finished=on_epoch_finished, on_training_started=on_training_started, #data augmentation batch_iterator_test= flip.ResizeBatchIterator(batch_size=128), batch_iterator_train=batch_iterator_train, max_epochs=self.epochs, verbose=1, **layer_params ) return layer_params
def main(): nottingham = pickle.load(file("Nottingham.pickle")) train = hot_to_sparse(nottingham['train'], 88) print len(train) np.random.shuffle(train) net = ClockworkRNN((88, (4, 30), 88), update_fn=adam, learning_rate=0.001, cost=quadratic_loss) batch_size = 700 losses = [] lrs = [] norms, momentum_norms = [], [] norms_thres, momentum_norms_thres = [], [] # X, y, x_series = create_batch_func_params(500, 0.1, 2) best = np.inf last_best_index = 0 decrement = float32(0.99) for i in range(2000): start = time() closs, cnorm, cnorm_threshold, cmomentum_norm, cmomentum_norm_threshold = [], [], [], [], [] for g in xrange(0, len(train), batch_size): loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt( train[g:g + batch_size, :-1], train[g:g + batch_size, 1:]) closs.append(loss) cnorm.append(norm) cnorm_threshold.append(norms_thres) cmomentum_norm.append(momentum_norm) cmomentum_norm_threshold.append(momentum_norm_threshold) losses.append(np.mean(closs)) norms.append(np.mean(cnorm)) momentum_norms.append(np.mean(cnorm_threshold)) norms_thres.append(np.mean(float32(cnorm_threshold))) momentum_norms_thres.append( np.mean([float32(cnmt) for cnmt in cmomentum_norm_threshold])) lrs.append(net.learning_rate.get_value()) epoch_time = time() - start print i, ':', losses[-1], " took :", epoch_time if best > losses[-1]: last_best_index = i best = losses[-1] elif i - last_best_index > 20: best = losses[-1] new_rate = net.learning_rate.get_value() * decrement net.learning_rate.set_value(new_rate) last_best_index = i print("New learning rate", new_rate)
def __call__(self, nn, train_history): if self.ls is None: self.ls = np.linspace(self.start, self.stop, nn.max_epochs) epoch = train_history[-1]['epoch'] new_value = utils.float32(self.ls[epoch-1]) getattr(nn, self.name).set_value(new_value)
def __call__(self, nn, train_history): if self.ls is None: self.ls = np.linspace(self.start, self.stop, nn.max_epochs) epoch = train_history[-1]['epoch'] new_value = utils.float32(self.ls[epoch - 1]) getattr(nn, self.name).set_value(new_value)
def create_batch_func_params(input_length=300, freq_var=0.1, size=20): freqs = float32(np.abs(np.random.normal(scale=freq_var, size=size)) + 0.1) # freqs = np.ones(size, dtype=floatX) * float32(0.1) X = np.array([np.ones(input_length, dtype=floatX) * freq for freq in freqs], dtype=floatX)[:, :, np.newaxis] x_series = np.array([np.linspace(0, input_length * freq, num=input_length, dtype=floatX) for freq in freqs], dtype=floatX) y = func_to_learn(x_series).astype(floatX)[:, :, np.newaxis] return X, y, x_series
def __getitem__(self, index, return_manip=False): x_link = self.X[index] x = None if self.lazy: x = self._load_and_transform(x_link) if self.train == False: m = float32(1. if '_manip' in x_link else 0.) x = (x, m) return x, self.y[index]
def __call__(self, nn, train_history): epoch = train_history[-1]['epoch'] if epoch in self.schedule: new_value = self.schedule[epoch] if new_value == 'stop': if self.weights_file is not None: nn.save_params_to(self.weights_file) raise StopIteration getattr(nn, self.name).set_value(utils.float32(new_value))
def main(): nottingham = pickle.load(file("Nottingham.pickle")) train = hot_to_sparse(nottingham['train'], 88) print len(train) np.random.shuffle(train) net = ClockworkRNN((88, (4, 30), 88), update_fn=adam, learning_rate=0.001, cost=quadratic_loss) batch_size = 700 losses = [] lrs = [] norms, momentum_norms = [], [] norms_thres, momentum_norms_thres = [], [] # X, y, x_series = create_batch_func_params(500, 0.1, 2) best = np.inf last_best_index = 0 decrement = float32(0.99) for i in range(2000): start = time() closs, cnorm, cnorm_threshold, cmomentum_norm, cmomentum_norm_threshold = [], [], [], [], [] for g in xrange(0, len(train), batch_size): loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt(train[g:g+batch_size, :-1], train[g:g+batch_size, 1:]) closs.append(loss) cnorm.append(norm) cnorm_threshold.append(norms_thres) cmomentum_norm.append(momentum_norm) cmomentum_norm_threshold.append(momentum_norm_threshold) losses.append(np.mean(closs)) norms.append(np.mean(cnorm)) momentum_norms.append(np.mean(cnorm_threshold)) norms_thres.append(np.mean(float32(cnorm_threshold))) momentum_norms_thres.append(np.mean([float32(cnmt) for cnmt in cmomentum_norm_threshold])) lrs.append(net.learning_rate.get_value()) epoch_time = time() - start print i, ':', losses[-1], " took :", epoch_time if best > losses[-1]: last_best_index = i best = losses[-1] elif i - last_best_index > 20: best = losses[-1] new_rate = net.learning_rate.get_value() * decrement net.learning_rate.set_value(new_rate) last_best_index = i print("New learning rate", new_rate)
def __init__(self, layer_specs=(1, (2, 20), (2, 10), 1), cost=quadratic_loss, update_fn=adam, learning_rate=0.001, alpha=1.): self.alpha = theano.shared(float32(alpha)) self.layer_specs = layer_specs self.layers = [] previous_size = layer_specs[0] self.params = [] self.cost = cost self.update_fn = update_fn self.learning_rate = theano.shared(float32(learning_rate)) self.training_step = theano.shared(float32(1)) for i, spec in enumerate(layer_specs[1:-1]): spec = [spec[0], spec[1], previous_size] self.layers.append(ClockworkLayer(*spec, activation_function=T.tanh)) self.params.extend(self.layers[-1].params) previous_size = spec[0] * spec[1] self.layers.append(OutputLayer(layer_specs[-1], previous_size, T.tanh)) self.params.extend(self.layers[-1].params)
def get_estimator(n_features, files, labels, eval_size=0.1): layers = [ (InputLayer, { 'shape': (None, n_features) }), (DenseLayer, { 'num_units': N_HIDDEN_1, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': N_HIDDEN_2, 'nonlinearity': rectify, 'W': init.Orthogonal('relu'), 'b': init.Constant(0.01) }), (FeaturePoolLayer, { 'pool_size': 2 }), (DenseLayer, { 'num_units': 1, 'nonlinearity': None }), ] args = dict( layers=layers, update=adam, update_learning_rate=theano.shared(utils.float32(START_LR)), batch_iterator_train=ResampleIterator(BATCH_SIZE), batch_iterator_test=BatchIterator(BATCH_SIZE), objective=nn.get_objective(l1=L1, l2=L2), # eval_size=eval_size, custom_score=('kappa', utils.kappa) if eval_size > 0.0 else None, on_epoch_finished=[ nn.Schedule('update_learning_rate', SCHEDULE), ], regression=True, max_epochs=N_ITER, verbose=1, ) net = BlendNet(eval_size=eval_size, **args) net.set_split(files, labels) return net
def create_net(config, **kwargs): args = { 'layers': config.layers, 'batch_iterator_train': iterator.ResampleIterator(config, batch_size=config.get('batch_size_train')), 'batch_iterator_test': iterator.SharedIterator(config, deterministic=True, batch_size=config.get('batch_size_test')), 'on_epoch_finished': [ Schedule('update_learning_rate', config.get('schedule'), weights_file=config.final_weights_file), SaveBestWeights( weights_file=config.weights_file, loss='kappa', greater_is_better=True, ), SaveWeights(config.weights_epoch, every_n_epochs=5), SaveWeights(config.weights_best, every_n_epochs=1, only_best=True), ], 'objective': get_objective(), 'use_label_encoder': False, 'eval_size': 0.1, 'regression': True, 'max_epochs': 1000, 'verbose': 1, 'update_learning_rate': theano.shared(utils.float32(config.get('schedule')[0])), 'update': nesterov_momentum, 'update_momentum': 0.9, 'custom_score': ('kappa', utils.kappa), } args.update(kwargs) net = Net(**args) return net
def _gen_predict_train_loaders(max_len=500): X_b = [] y_b = [] manip_b = [] for c in xrange(10): for b in xrange(N_BLOCKS[c]): X_block = np.load( os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, b % N_BLOCKS[c]))) X_b += [X_block[i] for i in xrange(len(X_block))] y_b += np.repeat(c, len(X_block)).tolist() manip_b += [float32(0.)] * len(X_block) if len(y_b) >= max_len: yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b X_b = [] y_b = [] manip_b = [] for c in xrange(10): for b in xrange(N_PSEUDO_BLOCKS[c]): X_pseudo_block = np.load( os.path.join( args.data_path, 'X_pseudo_{0}_{1}.npy'.format(c, b % N_PSEUDO_BLOCKS[c]))) X_b += [X_pseudo_block[i] for i in xrange(len(X_pseudo_block))] y_b += np.repeat(c, len(X_pseudo_block)).tolist() manip_block = np.load( os.path.join( args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(c, b % N_PSEUDO_BLOCKS[c]))) manip_b += [m for m in manip_block] if len(y_b) >= max_len: yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b X_b = [] y_b = [] manip_b = [] if y_b > 0: yield _make_predict_train_loader(X_b, manip_b), y_b, manip_b
def _make_predict_train_loader(X_b, manip_b, manip_ratio=0.): assert len(X_b) == len(manip_b) # make dataset rng = RNG(1337) train_transforms_list = [ transforms.Lambda(lambda (x, m): (Image.fromarray(x), m)), # if `val` == False # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # to get unalt : manip = 70 : 30 (like in test metric), # we manip ~24.7% of non-pseudo images # else: # we simply use same ratio as in validation (0.18) transforms.Lambda(lambda (img, m): (make_random_manipulation(img, rng, crop_policy='center', crop_size=512), float32(1.)) if \ m[0] < 0.5 and rng.rand() < manip_ratio else (center_crop(img, 512), m)) ] train_transforms_list += make_aug_transforms(rng) if args.crop_size == 512: train_transforms_list += [ transforms.Lambda(lambda ( img, m): ([img, img.transpose(Image.ROTATE_90)], [m] * 2)), transforms.Lambda(lambda (crops, ms): (torch.stack([ transforms.Normalize(args.means, args.stds) (transforms.ToTensor()(crop)) for crop in crops ]), torch.from_numpy(np.asarray(ms)))) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.TenCrop( args.crop_size)(img), [m] * 10)), transforms.Lambda(lambda (imgs, ms): (list( imgs) + [img.transpose(Image.ROTATE_90) for img in imgs], ms + ms)), transforms.Lambda(lambda (crops, ms): (torch.stack([ transforms.Normalize(args.means, args.stds) (transforms.ToTensor()(crop)) for crop in crops ]), torch.from_numpy(np.asarray(ms)))) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[(x, m) for x, m in zip(X_b, manip_b)], y=np.zeros(len(X_b), dtype=np.int64), transform=train_transform) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) return loader
def train(optimizer, train_optimizer=train_optimizer): # load and crop validation data print "Loading data ..." X_val = np.load(os.path.join(args.data_path, 'X_val.npy')) y_val = np.load(os.path.join(args.data_path, 'y_val.npy')) manip_val = np.zeros( (len(y_val), 1), dtype=np.float32 ) # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy')) # 68/480 manipulated c = args.crop_size C = X_val.shape[1] if c < C: X_val = X_val[:, C / 2 - c / 2:C / 2 + c / 2, C / 2 - c / 2:C / 2 + c / 2, :] if args.kernel: X_val = [conv_K(x) for x in X_val] # make validation loader rng = RNG(args.random_seed + 42 if args.random_seed else None) val_transform = transforms.Compose([ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ######## # 1 - (480-68-0.3*480)/(480-68) ~ 0.18 ######## transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\ m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (img, m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)), transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)), transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m)) ]) np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val)) val_dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_val, manip_val, y_val) ], y=y_val, transform=val_transform) val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) n_runs = args.epochs / args.epochs_per_unique_data + 1 for _ in xrange(n_runs): train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader)
def make_train_loaders(block_index): # assemble data X_train = [] y_train = [] manip_train = [] for c in xrange(10): X_block = np.load( os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, block_index % N_BLOCKS[c]))) X_block = [X_block[i] for i in xrange(len(X_block))] if args.bootstrap: X_block = [X_block[i] for i in b_ind[c][block_index % N_BLOCKS[c]]] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) for c in xrange(10): X_pseudo_block = np.load( os.path.join( args.data_path, 'X_pseudo_{0}_{1}.npy'.format( c, block_index % N_PSEUDO_BLOCKS[c]))) X_pseudo_block = [ X_pseudo_block[i] for i in xrange(len(X_pseudo_block)) ] if args.bootstrap: X_pseudo_block = [ X_pseudo_block[i] for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]] ] X_train += X_pseudo_block y_train += np.repeat(c, len(X_pseudo_block)).tolist() manip_block = np.load( os.path.join( args.data_path, 'manip_pseudo_{0}_{1}.npy'.format( c, block_index % N_PSEUDO_BLOCKS[c]))) manip_block = [m for m in manip_block] if args.bootstrap: manip_block = [ manip_block[i] for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]] ] manip_train += manip_block shuffle_ind = range(len(y_train)) RNG(seed=block_index).shuffle(shuffle_ind) X_train = [X_train[i] for i in shuffle_ind] y_train = [y_train[i] for i in shuffle_ind] manip_train = [manip_train[i] for i in shuffle_ind] # make dataset rng = RNG(args.random_seed) train_transforms_list = [ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ###### # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images ###### transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \ m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)), ] train_transforms_list += make_aug_transforms(rng) if args.kernel: train_transforms_list += [ transforms.Lambda(lambda (img, m): (conv_K(np.asarray(img, dtype=np.uint8)), m)), transforms.Lambda(lambda (x, m): (torch.from_numpy(x.transpose(2, 0, 1)), m)) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)) ] train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.Normalize( args.means, args.stds)(img), m)) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_train, manip_train, y_train) ], y=y_train, transform=train_transform) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers, sampler=StratifiedSampler( class_vector=np.asarray(y_train), batch_size=args.batch_size)) return loader
def make_train_loaders(block_index, distill=True): # assemble data X_train = [] y_train = [] manip_train = [] soft_logits = [] if distill: soft_logits_ind = [] for c in xrange(10): b = block_index % N_BLOCKS[c] X_block = np.load( os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, b))) X_block = [X_block[i] for i in xrange(len(X_block))] if args.bootstrap: X_block = [X_block[i] for i in b_ind[c][b]] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) soft_logits_ind_block = SOFT_LOGITS_IND[c][b] if args.bootstrap: soft_logits_ind_block = [ soft_logits_ind_block[i] for i in b_ind[c][b] ] soft_logits_ind += soft_logits_ind_block for c in xrange(10): b = N_PSEUDO_BLOCKS_FOR_VALIDATION + block_index % N_PSEUDO_BLOCKS[ c] X_pseudo_block = np.load( os.path.join(args.data_path, 'X_pseudo_{0}_{1}.npy'.format(c, b))) X_pseudo_block = [ X_pseudo_block[i] for i in xrange(len(X_pseudo_block)) ] if args.bootstrap: X_pseudo_block = [ X_pseudo_block[i] for i in b_pseudo_ind[c][b] ] X_train += X_pseudo_block y_train += np.repeat(c, len(X_pseudo_block)).tolist() manip_block = np.load( os.path.join(args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(c, b))) manip_block = [m for m in manip_block] if args.bootstrap: manip_block = [manip_block[i] for i in b_pseudo_ind[c][b]] manip_train += manip_block soft_logits_ind_block = SOFT_LOGITS_IND[10 + c][b] if args.bootstrap: soft_logits_ind_block = [ soft_logits_ind_block[i] for i in b_pseudo_ind[c][b] ] soft_logits_ind += soft_logits_ind_block soft_logits = np.load(os.path.join( args.data_path, 'logits_train.npy')).astype(np.float32) soft_logits -= soft_logits.mean(axis=1)[:, np.newaxis] soft_logits = [ soft_logits[i] / max(args.temperature, 1.) for i in soft_logits_ind ] else: for c in xrange(10): X_block = np.load( os.path.join(args.data_path, G[c][block_index % len(G[c])])) X_block = [X_block[i] for i in xrange(len(X_block))] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) soft_logits += [np.zeros((10), dtype=np.float32)] * len(X_block) shuffle_ind = range(len(y_train)) RNG(seed=block_index * 41).shuffle(shuffle_ind) X_train = [X_train[i] for i in shuffle_ind] y_train = [y_train[i] for i in shuffle_ind] manip_train = [manip_train[i] for i in shuffle_ind] soft_logits = [soft_logits[i] for i in shuffle_ind] # make dataset rng = RNG(args.random_seed) train_transforms_list = [ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ###### # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images ###### transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \ m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ True else (img, m)), ] train_transforms_list += make_aug_transforms(rng) if args.kernel: train_transforms_list += [ transforms.Lambda(lambda (img, m): (conv_K(np.asarray(img, dtype=np.uint8)), m)), transforms.Lambda(lambda (x, m): (torch.from_numpy(x.transpose(2, 0, 1)), m)) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)) ] train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.Normalize( args.means, args.stds)(img), m)) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_train, manip_train, y_train) ], y=y_train, transform=train_transform, soft_logits=soft_logits) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers, sampler=StratifiedSampler( class_vector=np.asarray(y_train), batch_size=args.batch_size)) return loader
def train(optimizer, train_optimizer=train_optimizer): # load and crop validation data print "Loading data ..." X_val = np.load(os.path.join(args.data_path, 'X_val.npy')) y_val = np.load(os.path.join(args.data_path, 'y_val.npy')).tolist() manip_val = np.zeros( (len(y_val), 1), dtype=np.float32 ) # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy')) # 68/480 manipulated d = args.crop_size * 2 D = X_val.shape[1] if d < D: X_val = X_val[:, D / 2 - d / 2:D / 2 + d / 2, D / 2 - d / 2:D / 2 + d / 2, :] if args.kernel: X_val = [conv_K(x) for x in X_val] X_val = [X_val[i] for i in xrange(len(X_val))] manip_val = [manip_val[i] for i in xrange(len(manip_val))] for b in xrange(N_PSEUDO_BLOCKS_FOR_VALIDATION): for c in xrange(10): X_block = np.load( os.path.join(args.data_path, 'X_pseudo_{0}_{1}.npy'.format(c, b))) y_val += [c] * len(X_block) d = args.crop_size * 2 D = X_block.shape[1] if d < D: X_block = X_block[:, D / 2 - d / 2:D / 2 + d / 2, D / 2 - d / 2:D / 2 + d / 2, :] X_val += [X_block[i] for i in xrange(len(X_block))] manip_block = np.load( os.path.join(args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(c, b))) manip_val += [m for m in manip_block] # make validation loader rng = RNG(args.random_seed + 42 if args.random_seed else None) val_transform = transforms.Compose([ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ######## # 1 - (480-68-0.3*480)/(480-68) ~ 0.18 ######## transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\ m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (center_crop(img, args.crop_size), m, y)), # transforms.Lambda(lambda (img, m, y): ([img, # img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ # True else (img, m)), transforms.Lambda(lambda (img, m, y): (transforms.ToTensor()(img), m)), transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m)) ]) np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val)) val_dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_val, manip_val, y_val) ], y=y_val, transform=val_transform) val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) for _ in xrange(args.distill_epochs / args.epochs_per_unique_data): train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data, distill=True) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader) n_runs = args.epochs / args.epochs_per_unique_data + 1 for _ in xrange(n_runs): optimizer.distill_cost = 0. train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data, distill=False) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader)
def main(): net = ClockworkRNN((1, (4, 30), 1), update_fn=adam, learning_rate=0.001) # ones = np.ones((2, 10, 1), dtype=floatX) # res = net.fprop(ones) losses = [] lrs = [] norms, momentum_norms = [], [] norms_thres, momentum_norms_thres = [], [] # X, y, x_series = create_batch_func_params(500, 0.1, 2) X, y, x_series = create_batch_func_params(410, 0.1, 2000) best = np.inf last_best_index = 0 decrement = float32(0.99) for i in range(8000): start = time() loss, norm, norm_theshold, momentum_norm, momentum_norm_threshold = net.bptt(X, y) losses.append(loss) norms.append(norm) momentum_norms.append(momentum_norm) norms_thres.append(float32(norm_theshold)) momentum_norms_thres.append(float32(momentum_norm_threshold)) lrs.append(net.learning_rate.get_value()) epoch_time = time() - start print i, ':', losses[-1], " took :", epoch_time if best > losses[-1]: last_best_index = i best = losses[-1] elif i - last_best_index > 20: best = losses[-1] new_rate = net.learning_rate.get_value() * decrement net.learning_rate.set_value(new_rate) last_best_index = i print("New learning rate", new_rate) with open('rnn.pickle', 'wb') as f: pickle.dump(net, f) # plt.figure(figsize=(40, 20), dpi=100) fig, ax1 = plt.subplots(figsize=(30, 10)) ax2 = ax1.twinx() ax1.plot(losses, label='loss') ax1.set_ylabel('Loss') ax2.plot(lrs, color="red", label='learning rate') ax2.set_ylabel('Learning rate') h1, l1 = ax1.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() ax1.legend(h1 + h2, l1 + l2, loc='best') fig.savefig('rnn_learning_rate_and_cost.jpg') plt.clf() fig, ax1 = plt.subplots(figsize=(30, 10)) ax2 = ax1.twinx() ax1.plot(losses[-1000:], label='loss') ax1.set_ylabel('Loss') ax2.plot(lrs[-1000:], color="red", label='learning rate') ax2.set_ylabel('Learning rate') h1, l1 = ax1.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() ax1.legend(h1 + h2, l1 + l2, loc='best') fig.savefig('rnn_learning_rate_and_cost_last1000.jpg') plt.clf() # NORMS fig, ax1 = plt.subplots(figsize=(30, 10)) ax2 = ax1.twinx() ax1.plot(norms, label="norm", color='red') ax1.plot(norms_thres, label="norm threshold", color='blue') ax1.set_ylabel('Gradient norm') ax2.plot(momentum_norms, label="momentum norms", color='green') ax2.plot(momentum_norms_thres, label="momentum norms threshold", color='purple') ax2.set_ylabel('Momentum norm') h1, l1 = ax1.get_legend_handles_labels() h2, l2 = ax2.get_legend_handles_labels() ax1.legend(h1 + h2, l1 + l2, loc='best') fig.savefig('rnn_norms.jpg') plt.clf() # X, y, x_series = X[:10], y[:10], x_series[:10] X, y, x_series = create_batch_func_params(1500, 0.1, 10) prediction = net.fprop(X) # plt.close('all') fig, axarr = plt.subplots(len(X), sharex=True, figsize=(160, 5 * X.shape[0])) for i in range(len(X)): # axarr[i].figure.figure = plt.figure(figsize=(150, 12), dpi=100) axarr[i].set_title('freq:' + str(X[i][i])) axarr[i].plot(prediction[i], label='model', color='blue') axarr[i].plot(y[i], label='actual', color='green') axarr_twin = axarr[i].twinx() axarr_twin.plot(np.abs(y[i] - prediction[i]), label='error', color='red') h1, l1 = axarr[i].get_legend_handles_labels() h2, l2 = axarr_twin.get_legend_handles_labels() axarr[i].legend(h1 + h2, l1 + l2, loc=0) fig.savefig('rnn_prediction_vs_actual.jpg') print "done"