def __init__(self): self.n_visible = 12 self.n_hidden = 8 self.X = RNG(seed=1337).rand(16, self.n_visible) self.X_val = RNG(seed=42).rand(8, self.n_visible) self.rbm_config = dict(n_visible=self.n_visible, n_hidden=self.n_hidden, sample_v_states=True, sample_h_states=True, dropout=0.9, verbose=False, display_filters=False, random_seed=1337)
def _make_predict_train_loader(X_b, manip_b, manip_ratio=0.): assert len(X_b) == len(manip_b) # make dataset rng = RNG(1337) train_transforms_list = [ transforms.Lambda(lambda (x, m): (Image.fromarray(x), m)), # if `val` == False # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # to get unalt : manip = 70 : 30 (like in test metric), # we manip ~24.7% of non-pseudo images # else: # we simply use same ratio as in validation (0.18) transforms.Lambda(lambda (img, m): (make_random_manipulation(img, rng, crop_policy='center', crop_size=512), float32(1.)) if \ m[0] < 0.5 and rng.rand() < manip_ratio else (center_crop(img, 512), m)) ] train_transforms_list += make_aug_transforms(rng) if args.crop_size == 512: train_transforms_list += [ transforms.Lambda(lambda ( img, m): ([img, img.transpose(Image.ROTATE_90)], [m] * 2)), transforms.Lambda(lambda (crops, ms): (torch.stack([ transforms.Normalize(args.means, args.stds) (transforms.ToTensor()(crop)) for crop in crops ]), torch.from_numpy(np.asarray(ms)))) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.TenCrop( args.crop_size)(img), [m] * 10)), transforms.Lambda(lambda (imgs, ms): (list( imgs) + [img.transpose(Image.ROTATE_90) for img in imgs], ms + ms)), transforms.Lambda(lambda (crops, ms): (torch.stack([ transforms.Normalize(args.means, args.stds) (transforms.ToTensor()(crop)) for crop in crops ]), torch.from_numpy(np.asarray(ms)))) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[(x, m) for x, m in zip(X_b, manip_b)], y=np.zeros(len(X_b), dtype=np.int64), transform=train_transform) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) return loader
def train(optimizer, train_optimizer=train_optimizer): # load and crop validation data print "Loading data ..." X_val = np.load(os.path.join(args.data_path, 'X_val.npy')) y_val = np.load(os.path.join(args.data_path, 'y_val.npy')) manip_val = np.zeros( (len(y_val), 1), dtype=np.float32 ) # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy')) # 68/480 manipulated c = args.crop_size C = X_val.shape[1] if c < C: X_val = X_val[:, C / 2 - c / 2:C / 2 + c / 2, C / 2 - c / 2:C / 2 + c / 2, :] if args.kernel: X_val = [conv_K(x) for x in X_val] # make validation loader rng = RNG(args.random_seed + 42 if args.random_seed else None) val_transform = transforms.Compose([ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ######## # 1 - (480-68-0.3*480)/(480-68) ~ 0.18 ######## transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\ m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (img, m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)), transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)), transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m)) ]) np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val)) val_dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_val, manip_val, y_val) ], y=y_val, transform=val_transform) val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) n_runs = args.epochs / args.epochs_per_unique_data + 1 for _ in xrange(n_runs): train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader)
def make_test_loader(): # TTA rng = RNG(args.random_seed) test_transforms_list = make_aug_transforms(rng, propagate_manip=False) if args.crop_size == 512: test_transforms_list += [ transforms.Lambda( lambda img: [img, img.transpose(Image.ROTATE_90)]), transforms.Lambda(lambda crops: torch.stack([ transforms.Normalize(args.means, args.stds) (transforms.ToTensor()(crop)) for crop in crops ])) ] else: test_transforms_list += [ transforms.TenCrop(args.crop_size), transforms.Lambda(lambda imgs: list(imgs) +\ [img.transpose(Image.ROTATE_90) for img in imgs]), transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(args.means, args.stds)(transforms.ToTensor()(crop)) for crop in crops])) ] test_transform = transforms.Compose(test_transforms_list) test_dataset = KaggleCameraDataset(args.data_path, train=False, transform=test_transform) test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) return test_dataset, test_loader
def forward_pass(self, X): # assert self.p > 0 if self.is_training: self._mask = RNG(self.random_seed).uniform(size=X.shape) > self.p Z = self._mask * X else: Z = (1.0 - self.p) * X # to keep output of the same scale (on average) return Z
def predict_proba(self, X): self._kernel = get_kernel(self.kernel, **self.kernel_params) K_star = self._kernel(X, self._X) self._rng = RNG(self.random_seed) predictions = [ self._predict_k_star(k_star, x_star) for k_star, x_star in zip(K_star, X) ] return np.asarray(predictions)
def glorot_normal(shape, random_seed=None): fan_in, fan_out = _glorot_fan(shape) s = np.sqrt(2. / (fan_in + fan_out)) return RNG(random_seed).normal(scale=s, size=shape)
def make_train_loaders(block_index): # assemble data X_train = [] y_train = [] manip_train = [] for c in xrange(10): X_block = np.load( os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, block_index % N_BLOCKS[c]))) X_block = [X_block[i] for i in xrange(len(X_block))] if args.bootstrap: X_block = [X_block[i] for i in b_ind[c][block_index % N_BLOCKS[c]]] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) for c in xrange(10): X_pseudo_block = np.load( os.path.join( args.data_path, 'X_pseudo_{0}_{1}.npy'.format( c, block_index % N_PSEUDO_BLOCKS[c]))) X_pseudo_block = [ X_pseudo_block[i] for i in xrange(len(X_pseudo_block)) ] if args.bootstrap: X_pseudo_block = [ X_pseudo_block[i] for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]] ] X_train += X_pseudo_block y_train += np.repeat(c, len(X_pseudo_block)).tolist() manip_block = np.load( os.path.join( args.data_path, 'manip_pseudo_{0}_{1}.npy'.format( c, block_index % N_PSEUDO_BLOCKS[c]))) manip_block = [m for m in manip_block] if args.bootstrap: manip_block = [ manip_block[i] for i in b_pseudo_ind[c][block_index % N_PSEUDO_BLOCKS[c]] ] manip_train += manip_block shuffle_ind = range(len(y_train)) RNG(seed=block_index).shuffle(shuffle_ind) X_train = [X_train[i] for i in shuffle_ind] y_train = [y_train[i] for i in shuffle_ind] manip_train = [manip_train[i] for i in shuffle_ind] # make dataset rng = RNG(args.random_seed) train_transforms_list = [ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ###### # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images ###### transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \ m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ KaggleCameraDataset.is_rotation_allowed()[y] else (img, m)), ] train_transforms_list += make_aug_transforms(rng) if args.kernel: train_transforms_list += [ transforms.Lambda(lambda (img, m): (conv_K(np.asarray(img, dtype=np.uint8)), m)), transforms.Lambda(lambda (x, m): (torch.from_numpy(x.transpose(2, 0, 1)), m)) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)) ] train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.Normalize( args.means, args.stds)(img), m)) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_train, manip_train, y_train) ], y=y_train, transform=train_transform) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers, sampler=StratifiedSampler( class_vector=np.asarray(y_train), batch_size=args.batch_size)) return loader
# [9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], # [9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], # [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7], # [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7], # [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7] # ] b_ind = [] b_pseudo_ind = [] if args.bootstrap: for c in xrange(10): b_ind.append([]) for b in xrange(N_BLOCKS[c]): N = N_IMAGES_PER_BLOCK[c][b] seed = 42 * args.random_seed + 101 * c + b if args.random_seed else None b_ind[c] += [RNG(seed).choice(range(N), N).tolist()] b_pseudo_ind.append([]) for b in xrange(N_PSEUDO_BLOCKS[c]): N = N_IMAGES_PER_PSEUDO_BLOCK[c][b] seed = 42 * args.random_seed + 1111 * c + b + 1337 if args.random_seed else None b_pseudo_ind[c] += [RNG(seed).choice(range(N), N).tolist()] K = 1 / 12. * np.array([[-1, 2, -2, 2, -1], [2, -6, 8, -6, 2], [-2, 8, -12, 8, -2], [2, -6, 8, -6, 2], [-1, 2, -2, 2, -1]]) def center_crop(img, crop_size): w = img.size[0] h = img.size[1] return img.crop((w / 2 - crop_size / 2, h / 2 - crop_size / 2,
a = [''] * 2 + ['-a'] b = ['-b 16'] * 3 + ['-b 12'] * 3 + ['-b 8'] cs = ['-cs 256'] * 2 + ['-cs 224'] d = ['-d 0', '-d 0.05', '-d 0.1', '-d 0.1', '-d 0.15'] # TODO eu = ['-eu 8'] * 3 + ['-eu 6'] * 2 + ['-eu 4'] fc = [''] * 3 + ['-fc 1024 256'] * 2 l_lr = ['-lr 1e-4 1e-3 -clr 1e-3 4e-3'] * 5 + ['-l hinge -lr 1e-5 1e-4 -clr 1e-4 4e-4'] opt = [''] * 5 + ['-opt adam'] w = ['-w'] * 4 + [''] nclr = [50] * 2 + [80] + [100] * 2 T = ['-t 0'] + ['-t 4'] * 5 + ['-t 6'] * 3 + ['-t 8'] * 2 dc = ['-dc 0.04', '-dc 0.08', '-dc 0.1'] seed = 111 rng = RNG(1337) for i in xrange(100): tmpl = 'nohup python run.py {a} {b} {cs} {d} {eu} {fc} {opt} {w} -e 4000 -bt -rs {seed}' tmpl += ' {l_lr} {nclr} {T} {dc}' tmpl += ' -md ../models/{path}/ > {seed}.out &' current = {} for v in ('a', 'b', 'cs', 'd', 'eu', 'fc', 'opt', 'w', 'l_lr', 'nclr', 'T', 'dc'): current[v] = rng.choice(globals()[v]) path = 'd121-bagg-{i}'.format(i=i + 1) # path = 'b{b}_cs{cs}_d{d}_eu{eu}_{fc}_{l}_{opt}_{w}_nclr{nclr}_seed{seed}' # path = path.format(b=current['b'][3:], cs=current['cs'][4:], d=current['d'][3:], # eu=current['eu'][4:], fc='fc' if current['fc'] else '', l=current['l'][3:], # opt='adam' if current['opt'] else '', w='w' if current['w'] else '', # nclr=current['nclr'], seed=seed) print tmpl.format(seed=seed, path=path, **current) seed += 111
def _fit(self, X): if not self._initialized: layer = FullyConnected(self.n_hidden, bias=0., random_seed=self.random_seed) layer.setup_weights(X.shape) self.W = layer.W self.vb = np.zeros(X.shape[1]) self.hb = layer.b self._dW = np.zeros_like(self.W) self._dvb = np.zeros_like(self.vb) self._dhb = np.zeros_like(self.hb) self._rng = RNG(self.random_seed) self._rng.reseed() timer = Stopwatch(verbose=False).start() for _ in xrange(self.n_epochs): self.epoch += 1 if self.verbose: print_inline('Epoch {0:>{1}}/{2} '.format( self.epoch, len(str(self.n_epochs)), self.n_epochs)) if isinstance(self.learning_rate, str): S, F = map(float, self.learning_rate.split('->')) self._learning_rate = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1.) / 8.)) / ( 1. - np.exp(-(self.n_epochs - 1.) / 8.)) else: self._learning_rate = self.learning_rate if isinstance(self.momentum, str): S, F = map(float, self.momentum.split('->')) self._momentum = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1) / 4.)) / ( 1. - np.exp(-(self.n_epochs - 1) / 4.)) else: self._momentum = self.momentum mean_recon = self.train_epoch(X) if mean_recon < self.best_recon: self.best_recon = mean_recon self.best_epoch = self.epoch self.best_W = self.W.copy() self.best_vb = self.vb.copy() self.best_hb = self.hb.copy() self._early_stopping = self.early_stopping msg = 'elapsed: {0} sec'.format( width_format(timer.elapsed(), default_width=5, max_precision=2)) msg += ' - recon. mse: {0}'.format( width_format(mean_recon, default_width=6, max_precision=4)) msg += ' - best r-mse: {0}'.format( width_format(self.best_recon, default_width=6, max_precision=4)) if self.early_stopping: msg += ' {0}*'.format(self._early_stopping) if self.verbose: print msg if self._early_stopping == 0: return if self.early_stopping: self._early_stopping -= 1
def __call__(self, x): self.rng = RNG(self.random_seed) return self._call(x)
def train(optimizer, train_optimizer=train_optimizer): # load and crop validation data print "Loading data ..." X_val = np.load(os.path.join(args.data_path, 'X_val.npy')) y_val = np.load(os.path.join(args.data_path, 'y_val.npy')).tolist() manip_val = np.zeros( (len(y_val), 1), dtype=np.float32 ) # np.load(os.path.join(args.data_path, 'manip_with_pseudo.npy')) # 68/480 manipulated d = args.crop_size * 2 D = X_val.shape[1] if d < D: X_val = X_val[:, D / 2 - d / 2:D / 2 + d / 2, D / 2 - d / 2:D / 2 + d / 2, :] if args.kernel: X_val = [conv_K(x) for x in X_val] X_val = [X_val[i] for i in xrange(len(X_val))] manip_val = [manip_val[i] for i in xrange(len(manip_val))] for b in xrange(N_PSEUDO_BLOCKS_FOR_VALIDATION): for c in xrange(10): X_block = np.load( os.path.join(args.data_path, 'X_pseudo_{0}_{1}.npy'.format(c, b))) y_val += [c] * len(X_block) d = args.crop_size * 2 D = X_block.shape[1] if d < D: X_block = X_block[:, D / 2 - d / 2:D / 2 + d / 2, D / 2 - d / 2:D / 2 + d / 2, :] X_val += [X_block[i] for i in xrange(len(X_block))] manip_block = np.load( os.path.join(args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(c, b))) manip_val += [m for m in manip_block] # make validation loader rng = RNG(args.random_seed + 42 if args.random_seed else None) val_transform = transforms.Compose([ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ######## # 1 - (480-68-0.3*480)/(480-68) ~ 0.18 ######## transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng, crop_policy='center'), float32(1.), y) if\ m[0] < 0.5 and rng.rand() < VAL_MANIP_RATIO else (center_crop(img, args.crop_size), m, y)), # transforms.Lambda(lambda (img, m, y): ([img, # img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ # True else (img, m)), transforms.Lambda(lambda (img, m, y): (transforms.ToTensor()(img), m)), transforms.Lambda(lambda (img, m): (transforms.Normalize(args.means, args.stds)(img), m)) ]) np.save(os.path.join(args.model_dirpath, 'y_val.npy'), np.vstack(y_val)) val_dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_val, manip_val, y_val) ], y=y_val, transform=val_transform) val_loader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) for _ in xrange(args.distill_epochs / args.epochs_per_unique_data): train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data, distill=True) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader) n_runs = args.epochs / args.epochs_per_unique_data + 1 for _ in xrange(n_runs): optimizer.distill_cost = 0. train_loader = make_train_loaders(block_index=optimizer.epoch / args.epochs_per_unique_data, distill=False) optimizer.max_epoch = optimizer.epoch + args.epochs_per_unique_data train_optimizer(optimizer, train_loader, val_loader)
def glorot_uniform(shape, random_seed=None): fan_in, fan_out = _glorot_fan(shape) s = np.sqrt(6. / (fan_in + fan_out)) return RNG(random_seed).uniform(low=-s, high=s, size=shape)
def predict(optimizer, **kwargs): # load data X_test = np.load(os.path.join(kwargs['data_path'], 'X_test.npy')) y_test = np.zeros((len(X_test), ), dtype=np.int64) test_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) # TTA rng = RNG(seed=1337) base_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.Lambda( lambda img: [img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)]), transforms.Lambda( lambda img: adjust_gamma(img, gamma=rng.uniform(0.8, 1.25))), transforms.Lambda( lambda img: jpg_compress(img, quality=rng.randint(70, 100 + 1))), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) tta_n = 10 def tta_f(img, n=tta_n - 1): out = [test_transform(img)] for _ in xrange(n): out.append(base_transform(img)) return torch.stack(out, 0) tta_transform = transforms.Compose([ transforms.Lambda(lambda img: tta_f(img)), ]) test_loader = DataLoader(dataset=make_numpy_dataset( X_test, y_test, tta_transform), batch_size=kwargs['batch_size'], shuffle=False, num_workers=4) test_dataset = KaggleCameraDataset(kwargs['data_path'], train=False, lazy=not kwargs['not_lazy']) # compute predictions logits, _ = optimizer.test(test_loader) # compute and save raw probs logits = np.vstack(logits) proba = softmax(logits) # group and average predictions K = 16 * tta_n proba = proba.reshape(len(proba) / K, K, -1).mean(axis=1) fnames = [os.path.split(fname)[-1] for fname in test_dataset.X] df = pd.DataFrame(proba) df['fname'] = fnames df = df[['fname'] + range(10)] dirpath = os.path.split(kwargs['predict_from'])[0] df.to_csv(os.path.join(dirpath, 'proba.csv'), index=False) # compute predictions and save in submission format index_pred = unhot(one_hot_decision_function(proba)) data = { 'fname': fnames, 'camera': [KaggleCameraDataset.target_labels()[int(c)] for c in index_pred] } df2 = pd.DataFrame(data, columns=['fname', 'camera']) df2.to_csv(os.path.join(dirpath, 'submission.csv'), index=False)
def train(optimizer, **kwargs): # load training data print 'Loading and splitting data ...' if os.path.isfile(os.path.join(kwargs['data_path'], 'X_train.npy')): X_train = np.load(os.path.join(kwargs['data_path'], 'X_train.npy')) y_train = np.load(os.path.join(kwargs['data_path'], 'y_train.npy')) X_val = np.load(os.path.join(kwargs['data_path'], 'X_val.npy')) y_val = np.load(os.path.join(kwargs['data_path'], 'y_val.npy')) else: X = np.load(os.path.join(kwargs['data_path'], 'X_patches.npy')) y = np.load(os.path.join(kwargs['data_path'], 'y_patches.npy')) # split into train, val in stratified fashion sss = StratifiedShuffleSplit(n_splits=1, test_size=kwargs['n_val'], random_state=kwargs['random_seed']) train_ind, val_ind = list(sss.split(np.zeros_like(y), y))[0] X_train = X[train_ind] y_train = y[train_ind] X_val = X[val_ind] y_val = y[val_ind] np.save(os.path.join(kwargs['data_path'], 'X_train.npy'), X_train) np.save(os.path.join(kwargs['data_path'], 'y_train.npy'), y_train) np.save(os.path.join(kwargs['data_path'], 'X_val.npy'), X_val) np.save(os.path.join(kwargs['data_path'], 'y_val.npy'), y_val) rng = RNG() # noinspection PyTypeChecker train_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.Lambda( lambda img: [img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)]), transforms.Lambda( lambda img: adjust_gamma(img, gamma=rng.uniform(0.8, 1.25))), transforms.Lambda( lambda img: jpg_compress(img, quality=rng.randint(70, 100 + 1))), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) val_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_dataset = make_numpy_dataset(X_train, y_train, train_transform) val_dataset = make_numpy_dataset(X_val, y_val, val_transform) # define loaders train_loader = DataLoader(dataset=train_dataset, batch_size=kwargs['batch_size'], shuffle=False, num_workers=4, sampler=StratifiedSampler( class_vector=y_train, batch_size=kwargs['batch_size'])) val_loader = DataLoader(dataset=val_dataset, batch_size=kwargs['batch_size'], shuffle=False, num_workers=4) print 'Starting training ...' optimizer.train(train_loader, val_loader)
class TrainTestSplitter(object): """ A generic class for splitting data into (random) subsets. Parameters ---------- shuffle : bool, optional Whether to shuffle the data. random_seed : None or int, optional Pseudo-random number generator seed used for random sampling. Examples -------- >>> import numpy as np >>> y = np.array([1, 1, 2, 2, 3, 3, 3]) >>> tts1 = TrainTestSplitter(shuffle=False) >>> train, test = tts1.split(y, train_ratio=0.5) >>> print y[train], y[test] [1 1 2] [2 3 3 3] >>> train, test = tts1.split(y, train_ratio=0.5, stratify=True) >>> print y[train], y[test] [1 2 3] [1 2 3 3] >>> for fold in tts1.make_k_folds(y, n_folds=3): ... print y[fold] [1 1 2] [2 3] [3 3] >>> for fold in tts1.make_k_folds(y, n_folds=3, stratify=True): ... print y[fold] [1 2 3] [1 2 3] [3] >>> for train, test in tts1.k_fold_split(y, n_splits=3): ... print y[train], y[test] [2 3 3 3] [1 1 2] [1 1 2 3 3] [2 3] [1 1 2 2 3] [3 3] >>> for train, test in tts1.k_fold_split(y, n_splits=3, stratify=True): ... print y[train], y[test] [1 2 3 3] [1 2 3] [1 2 3 3] [1 2 3] [1 2 3 1 2 3] [3] >>> tts2 = TrainTestSplitter(shuffle=True, random_seed=1337) >>> train, test = tts2.split(y, train_ratio=0.5) >>> print y[train], y[test] [3 2 1] [2 1 3 3] >>> train, test = tts2.split(y, train_ratio=0.5, stratify=True) >>> print y[train], y[test] [3 1 2] [3 3 2 1] >>> for fold in tts2.make_k_folds(y, n_folds=3): ... print y[fold] [3 2 1] [2 1] [3 3] >>> for fold in tts2.make_k_folds(y, n_folds=3, stratify=True): ... print y[fold] [3 1 2] [3 2 1] [3] """ def __init__(self, shuffle=False, random_seed=None): self.shuffle = shuffle self.random_seed = random_seed self.rng = RNG(self.random_seed) def split(self, y, train_ratio=0.8, stratify=False): """ Split data into train and test subsets. Parameters ---------- y : (n_samples,) array-like The target variable for supervised learning problems. train_ratio : float, 0 < `train_ratio` < 1, optional the proportion of the dataset to include in the train split. stratify : bool, optional If True, the folds are made by preserving the percentage of samples for each class. Stratification is done based upon the `y` labels. Returns ------- train : (n_train,) np.ndarray The training set indices for that split. test : (n_samples - n_train,) np.ndarray The testing set indices for that split. """ self.rng.reseed() n = len(y) if not stratify: indices = self.rng.permutation(n) if self.shuffle else np.arange( n, dtype=np.int) train_size = int(train_ratio * n) return np.split(indices, (train_size, )) # group indices by label labels_indices = {} for index, label in enumerate(y): if not label in labels_indices: labels_indices[label] = [] labels_indices[label].append(index) train, test = np.array([], dtype=np.int), np.array([], dtype=np.int) for label, indices in sorted(labels_indices.items()): size = int(train_ratio * len(indices)) train = np.concatenate((train, indices[:size])) test = np.concatenate((test, indices[size:])) if self.shuffle: self.rng.shuffle(train) self.rng.shuffle(test) return train, test def make_k_folds(self, y, n_folds=3, stratify=False): """ Split data into folds of (approximately) equal size. Parameters ---------- y : (n_samples,) array-like The target variable for supervised learning problems. Stratification is done based upon the `y` labels. n_folds : int, `n_folds` > 1, optional Number of folds. stratify : bool, optional If True, the folds are made by preserving the percentage of samples for each class. Stratification is done based upon the `y` labels. Yields ------ fold : np.ndarray Indices for current fold. """ self.rng.reseed() n = len(y) if not stratify: indices = self.rng.permutation(n) if self.shuffle else np.arange( n, dtype=np.int) for fold in np.array_split(indices, n_folds): yield fold return # group indices labels_indices = {} for index, label in enumerate(y): if isinstance(label, np.ndarray): label = tuple(label.tolist()) if not label in labels_indices: labels_indices[label] = [] labels_indices[label].append(index) # split all indices label-wisely for label, indices in sorted(labels_indices.items()): labels_indices[label] = np.array_split(indices, n_folds) # collect respective splits into folds and shuffle if needed for k in xrange(n_folds): fold = np.concatenate( [indices[k] for _, indices in sorted(labels_indices.items())]) if self.shuffle: self.rng.shuffle(fold) yield fold def k_fold_split(self, y, n_splits=3, stratify=False): """ Split data into train and test subsets for K-fold CV. Parameters ---------- y : (n_samples,) array-like The target variable for supervised learning problems. Stratification is done based upon the `y` labels. n_splits : int, `n_splits` > 1, optional Number of folds. stratify : bool, optional If True, the folds are made by preserving the percentage of samples for each class. Stratification is done based upon the `y` labels. Yields ------ train : (n_train,) np.ndarray The training set indices for current split. test : (n_samples - n_train,) np.ndarray The testing set indices for current split. """ folds = list(self.make_k_folds(y, n_folds=n_splits, stratify=stratify)) for i in xrange(n_splits): yield np.concatenate(folds[:i] + folds[(i + 1):]), folds[i]
def __init__(self, shuffle=False, random_seed=None): self.shuffle = shuffle self.random_seed = random_seed self.rng = RNG(self.random_seed)
class RBM(BaseEstimator): """ Examples -------- >>> X = RNG(seed=1337).rand(32, 256) >>> rbm = RBM(n_hidden=100, ... k=4, ... batch_size=2, ... n_epochs=50, ... learning_rate='0.05->0.005', ... momentum='0.5->0.9', ... verbose=True, ... early_stopping=5, ... random_seed=1337) >>> rbm RBM(W=None, batch_size=2, best_W=None, best_epoch=None, best_hb=None, best_recon=inf, best_vb=None, early_stopping=5, epoch=0, hb=None, k=4, learning_rate='0.05->0.005', momentum='0.5->0.9', n_epochs=50, n_hidden=100, persistent=True, random_seed=1337, vb=None, verbose=True) """ def __init__(self, n_hidden=256, persistent=True, k=1, batch_size=10, n_epochs=10, learning_rate=0.1, momentum=0.9, early_stopping=None, verbose=False, random_seed=None): self.n_hidden = n_hidden self.persistent = persistent self.k = k # k in CD-k / PCD-k self.batch_size = batch_size self.n_epochs = n_epochs self.learning_rate = learning_rate self._learning_rate = None self.momentum = momentum self._momentum = None self.early_stopping = early_stopping self._early_stopping = self.early_stopping self.verbose = verbose self.random_seed = random_seed self.W = None self.vb = None # visible units bias self.hb = None # hidden units bias self.epoch = 0 self.best_W = None self.best_vb = None self.best_hb = None self.best_epoch = None self.best_recon = np.inf self._dW = None self._dvb = None self._dhb = None self._rng = None self._persistent = None self._initialized = False super(RBM, self).__init__(_y_required=False) def propup(self, v): """Propagate visible units activation upwards to the hidden units.""" z = np.dot(v, self.W) + self.hb return sigmoid(z) def sample_h_given_v(self, v0_sample): """Infer state of hidden units given visible units.""" h1_mean = self.propup(v0_sample) h1_sample = self._rng.binomial(size=h1_mean.shape, n=1, p=h1_mean) return h1_mean, h1_sample def propdown(self, h): """Propagate hidden units activation downwards to the visible units.""" z = np.dot(h, self.W.T) + self.vb return sigmoid(z) def sample_v_given_h(self, h0_sample): """Infer state of visible units given hidden units.""" v1_mean = self.propdown(h0_sample) v1_sample = self._rng.binomial(size=v1_mean.shape, n=1, p=v1_mean) return v1_mean, v1_sample def gibbs_hvh(self, h0_sample): """Performs a step of Gibbs sampling starting from the hidden units.""" v1_mean, v1_sample = self.sample_v_given_h(h0_sample) h1_mean, h1_sample = self.sample_h_given_v(v1_sample) return v1_mean, v1_sample, h1_mean, h1_sample def gibbs_vhv(self, v0_sample): """Performs a step of Gibbs sampling starting from the visible units.""" raise NotImplementedError() def free_energy(self, v_sample): """Function to compute the free energy.""" raise NotImplementedError() def update(self, X_batch): # compute positive phase ph_mean, ph_sample = self.sample_h_given_v(X_batch) # decide how to initialize chain if self._persistent is not None: chain_start = self._persistent else: chain_start = ph_sample # gibbs sampling for step in xrange(self.k): nv_means, nv_samples, \ nh_means, nh_samples = self.gibbs_hvh(chain_start if step == 0 else nh_samples) # update weights self._dW = self._momentum * self._dW + \ np.dot(X_batch.T, ph_mean) - np.dot(nv_samples.T, nh_means) self._dvb = self._momentum * self._dvb +\ np.mean(X_batch - nv_samples, axis=0) self._dhb = self._momentum * self._dhb +\ np.mean(ph_mean - nh_means, axis=0) self.W += self._learning_rate * self._dW self.vb += self._learning_rate * self._dvb self.hb += self._learning_rate * self._dhb # remember state if needed if self.persistent: self._persistent = nh_samples return np.mean(np.square(X_batch - nv_means)) def batch_iter(self, X): n_batches = len(X) / self.batch_size for i in xrange(n_batches): start = i * self.batch_size end = start + self.batch_size X_batch = X[start:end] yield X_batch if n_batches * self.batch_size < len(X): yield X[end:] def train_epoch(self, X): mean_recons = [] for i, X_batch in enumerate(self.batch_iter(X)): mean_recons.append(self.update(X_batch)) if self.verbose and i % (len(X) / (self.batch_size * 16)) == 0: print_inline('.') if self.verbose: print_inline(' ') return np.mean(mean_recons) def _fit(self, X): if not self._initialized: layer = FullyConnected(self.n_hidden, bias=0., random_seed=self.random_seed) layer.setup_weights(X.shape) self.W = layer.W self.vb = np.zeros(X.shape[1]) self.hb = layer.b self._dW = np.zeros_like(self.W) self._dvb = np.zeros_like(self.vb) self._dhb = np.zeros_like(self.hb) self._rng = RNG(self.random_seed) self._rng.reseed() timer = Stopwatch(verbose=False).start() for _ in xrange(self.n_epochs): self.epoch += 1 if self.verbose: print_inline('Epoch {0:>{1}}/{2} '.format( self.epoch, len(str(self.n_epochs)), self.n_epochs)) if isinstance(self.learning_rate, str): S, F = map(float, self.learning_rate.split('->')) self._learning_rate = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1.) / 8.)) / ( 1. - np.exp(-(self.n_epochs - 1.) / 8.)) else: self._learning_rate = self.learning_rate if isinstance(self.momentum, str): S, F = map(float, self.momentum.split('->')) self._momentum = S + (F - S) * ( 1. - np.exp(-(self.epoch - 1) / 4.)) / ( 1. - np.exp(-(self.n_epochs - 1) / 4.)) else: self._momentum = self.momentum mean_recon = self.train_epoch(X) if mean_recon < self.best_recon: self.best_recon = mean_recon self.best_epoch = self.epoch self.best_W = self.W.copy() self.best_vb = self.vb.copy() self.best_hb = self.hb.copy() self._early_stopping = self.early_stopping msg = 'elapsed: {0} sec'.format( width_format(timer.elapsed(), default_width=5, max_precision=2)) msg += ' - recon. mse: {0}'.format( width_format(mean_recon, default_width=6, max_precision=4)) msg += ' - best r-mse: {0}'.format( width_format(self.best_recon, default_width=6, max_precision=4)) if self.early_stopping: msg += ' {0}*'.format(self._early_stopping) if self.verbose: print msg if self._early_stopping == 0: return if self.early_stopping: self._early_stopping -= 1 def _serialize(self, params): for attr in ('W', 'best_W', 'vb', 'best_vb', 'hb', 'best_hb'): if attr in params and params[attr] is not None: params[attr] = params[attr].tolist() return params def _deserialize(self, params): for attr in ('W', 'best_W', 'vb', 'best_vb', 'hb', 'best_hb'): if attr in params and params[attr] is not None: params[attr] = np.asarray(params[attr]) return params
def __init__(self, random_seed=None): self.random_seed = random_seed self.rng = RNG(self.random_seed)
def make_train_loaders(block_index, distill=True): # assemble data X_train = [] y_train = [] manip_train = [] soft_logits = [] if distill: soft_logits_ind = [] for c in xrange(10): b = block_index % N_BLOCKS[c] X_block = np.load( os.path.join(args.data_path, 'X_{0}_{1}.npy'.format(c, b))) X_block = [X_block[i] for i in xrange(len(X_block))] if args.bootstrap: X_block = [X_block[i] for i in b_ind[c][b]] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) soft_logits_ind_block = SOFT_LOGITS_IND[c][b] if args.bootstrap: soft_logits_ind_block = [ soft_logits_ind_block[i] for i in b_ind[c][b] ] soft_logits_ind += soft_logits_ind_block for c in xrange(10): b = N_PSEUDO_BLOCKS_FOR_VALIDATION + block_index % N_PSEUDO_BLOCKS[ c] X_pseudo_block = np.load( os.path.join(args.data_path, 'X_pseudo_{0}_{1}.npy'.format(c, b))) X_pseudo_block = [ X_pseudo_block[i] for i in xrange(len(X_pseudo_block)) ] if args.bootstrap: X_pseudo_block = [ X_pseudo_block[i] for i in b_pseudo_ind[c][b] ] X_train += X_pseudo_block y_train += np.repeat(c, len(X_pseudo_block)).tolist() manip_block = np.load( os.path.join(args.data_path, 'manip_pseudo_{0}_{1}.npy'.format(c, b))) manip_block = [m for m in manip_block] if args.bootstrap: manip_block = [manip_block[i] for i in b_pseudo_ind[c][b]] manip_train += manip_block soft_logits_ind_block = SOFT_LOGITS_IND[10 + c][b] if args.bootstrap: soft_logits_ind_block = [ soft_logits_ind_block[i] for i in b_pseudo_ind[c][b] ] soft_logits_ind += soft_logits_ind_block soft_logits = np.load(os.path.join( args.data_path, 'logits_train.npy')).astype(np.float32) soft_logits -= soft_logits.mean(axis=1)[:, np.newaxis] soft_logits = [ soft_logits[i] / max(args.temperature, 1.) for i in soft_logits_ind ] else: for c in xrange(10): X_block = np.load( os.path.join(args.data_path, G[c][block_index % len(G[c])])) X_block = [X_block[i] for i in xrange(len(X_block))] X_train += X_block y_train += np.repeat(c, len(X_block)).tolist() manip_train += [float32(0.)] * len(X_block) soft_logits += [np.zeros((10), dtype=np.float32)] * len(X_block) shuffle_ind = range(len(y_train)) RNG(seed=block_index * 41).shuffle(shuffle_ind) X_train = [X_train[i] for i in shuffle_ind] y_train = [y_train[i] for i in shuffle_ind] manip_train = [manip_train[i] for i in shuffle_ind] soft_logits = [soft_logits[i] for i in shuffle_ind] # make dataset rng = RNG(args.random_seed) train_transforms_list = [ transforms.Lambda(lambda (x, m, y): (Image.fromarray(x), m, y)), ###### # 972/1982 manip pseudo images # images : pseudo = approx. 48 : 8 = 6 : 1 # thus to get 50 : 50 manip : unalt we manip 11965/25874 ~ 46% of non-pseudo images ###### transforms.Lambda(lambda (img, m, y): (make_random_manipulation(img, rng), float32(1.), y) if \ m[0] < 0.5 and rng.rand() < TRAIN_MANIP_RATIO else (make_crop(img, args.crop_size, rng), m, y)), transforms.Lambda(lambda (img, m, y): ([img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)], m) if \ True else (img, m)), ] train_transforms_list += make_aug_transforms(rng) if args.kernel: train_transforms_list += [ transforms.Lambda(lambda (img, m): (conv_K(np.asarray(img, dtype=np.uint8)), m)), transforms.Lambda(lambda (x, m): (torch.from_numpy(x.transpose(2, 0, 1)), m)) ] else: train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.ToTensor()(img), m)) ] train_transforms_list += [ transforms.Lambda(lambda (img, m): (transforms.Normalize( args.means, args.stds)(img), m)) ] train_transform = transforms.Compose(train_transforms_list) dataset = make_numpy_dataset(X=[ (x, m, y) for x, m, y in zip(X_train, manip_train, y_train) ], y=y_train, transform=train_transform, soft_logits=soft_logits) # make loader loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers, sampler=StratifiedSampler( class_vector=np.asarray(y_train), batch_size=args.batch_size)) return loader