def test_cifar10(): train = CIFAR10(('train', ), load_in_memory=False) assert train.num_examples == 50000 handle = train.open() features, targets = train.get_data(handle, slice(49990, 50000)) assert features.shape == (10, 3, 32, 32) assert targets.shape == (10, 1) train.close(handle) test = CIFAR10(('test', ), load_in_memory=False) handle = test.open() features, targets = test.get_data(handle, slice(0, 10)) assert features.shape == (10, 3, 32, 32) assert targets.shape == (10, 1) assert features.dtype == numpy.uint8 assert targets.dtype == numpy.uint8 test.close(handle) stream = DataStream.default_stream(test, iteration_scheme=SequentialScheme( 10, 10)) data = next(stream.get_epoch_iterator())[0] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX assert_raises(ValueError, CIFAR10, ('valid', ))
def test_cifar10(): cifar10_train = CIFAR10('train', start=20000) assert len(cifar10_train.features) == 30000 assert len(cifar10_train.targets) == 30000 assert cifar10_train.num_examples == 30000 cifar10_test = CIFAR10('test', sources=('targets', )) assert len(cifar10_test.targets) == 10000 assert cifar10_test.num_examples == 10000 first_feature, first_target = cifar10_train.get_data(request=[0]) assert first_feature.shape == (1, 3072) assert first_feature.dtype.kind == 'f' assert first_target.shape == (1, 1) assert first_target.dtype is numpy.dtype('uint8') first_target, = cifar10_test.get_data(request=[0, 1]) assert first_target.shape == (2, 1) assert_raises(ValueError, CIFAR10, 'valid') cifar10_test = cPickle.loads(cPickle.dumps(cifar10_test)) assert len(cifar10_test.targets) == 10000 cifar_10_test_unflattened = CIFAR10('test', flatten=False) cifar_10_test_unflattened.features.shape == (10000, 3, 32, 32)
def create_cifar10_streams(training_batch_size, monitoring_batch_size): """Creates CIFAR10 data streams. Parameters ---------- training_batch_size : int Batch size for training. monitoring_batch_size : int Batch size for monitoring. Returns ------- rval : tuple of data streams Data streams for the main loop, the training set monitor, the validation set monitor and the test set monitor. """ train_set = CIFAR10(('train',), sources=('features',), subset=slice(0, 45000)) valid_set = CIFAR10(('train',), sources=('features',), subset=slice(45000, 50000)) test_set = CIFAR10(('test',), sources=('features',)) return create_streams(train_set, valid_set, test_set, training_batch_size, monitoring_batch_size)
def create_cifar10_data_streams(batch_size, monitoring_batch_size, rng=None): train_set = CIFAR10( ('train',), sources=('features',), subset=slice(0, 45000)) valid_set = CIFAR10( ('train',), sources=('features',), subset=slice(45000, 50000)) main_loop_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size, rng=rng)) train_monitor_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) valid_monitor_stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) return main_loop_stream, train_monitor_stream, valid_monitor_stream
def get_cifar10(split, sources, load_in_memory): from fuel.datasets import CIFAR10 if 'test' not in split: subset = slice(0, 45000) if 'train' in split else slice(45000, 50000) split = ('train', ) else: subset = None return CIFAR10(split, sources=sources, subset=subset, load_in_memory=load_in_memory)
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams ''' x = T.matrix('x', dtype='float32') temp = T.scalar('temp', dtype='float32') f=transition_operator(tparams, model_options, x, temp) for data in train_stream.get_epoch_iterator(): print data[0] a = f([data[0], 1.0, 1]) #ipdb.set_trace() ''' x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' for param in tparams: print param print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size, 3 * 32 * 32), ) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( [data_run, temperature_forward, 1]) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. t1 = time.time() #print time.time() - t1, "time to get grads" t1 = time.time() logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) #'Norm_1': np.linalg.norm(gradient_updates_[0]), #'Norm_2': np.linalg.norm(gradient_updates_[1]), #'Norm_3': np.linalg.norm(gradient_updates_[2]), #'Norm_4': np.linalg.norm(gradient_updates_[3])}) #print time.time() - t1, "time to log" #print time.time() - t0, "total time in batch" t5 = time.time() if batch_index % 20 == 0: print batch_index, "cost", cost if batch_index % 200 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [data_use[0], temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [x_data, temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On backward step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/' + "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor if args.noise == "gaussian": x_sampled = np.random.normal( 0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
def train(args, model_args): model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=20000, classes=1, cycles=1., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) elif args.dataset == 'Circle': print 'loading Circle' train_set = Circle(num_examples=20000, classes=1, cycles=1., noise=0.0, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) iter_per_epoch = train_set.num_examples else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() train_stream = dataset_train shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 0 print 'Number of steps....', args.num_steps print 'Done' count_sample = 1 batch_index = 0 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') if eidx == 30: ipdb.set_trace() n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): batch_index += 1 n_samples += len(data[0]) uidx += 1 if data[0] is None: print 'No data ' uidx -= 1 continue data_run = data[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( data_run, temperature_forward) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) empty = [] spiral_x = [empty for i in range(args.num_steps)] spiral_corrupted = [] spiral_sampled = [] grad_forward = [] grad_back = [] x_data_time = [] x_tilt_time = [] if batch_index % 8 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps): if num_step == 0: x_data_time.append(data[0]) plot_images( data[0], model_dir + '/' + 'orig_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) x_data, mu_data, _, _ = forward_diffusion( data[0], temperature_forward) plot_images( x_data, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(num_step)) x_data_time.append(x_data) temp_grad = np.concatenate( (x_data_time[-2], x_data_time[-1]), axis=1) grad_forward.append(temp_grad) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) spiral_corrupted.append(x_data) mu_data = np.asarray(mu_data).astype( 'float32').reshape(args.batch_size, INPUT_SIZE) mu_data = mu_data.reshape(args.batch_size, 2) else: x_data_time.append(x_data) x_data, mu_data, _, _ = forward_diffusion( x_data, temperature_forward) plot_images( x_data, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(num_step)) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) spiral_corrupted.append(x_data) mu_data = np.asarray(mu_data).astype( 'float32').reshape(args.batch_size, INPUT_SIZE) mu_data = mu_data.reshape(args.batch_size, 2) x_data_time.append(x_data) temp_grad = np.concatenate( (x_data_time[-2], x_data_time[-1]), axis=1) grad_forward.append(temp_grad) temperature_forward = temperature_forward * args.temperature_factor mean_sampled = x_data.mean() var_sampled = x_data.var() x_temp2 = data[0].reshape(args.batch_size, 2) plot_2D( spiral_corrupted, args.num_steps, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) plot_2D( x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index)) plot_grad( grad_forward, model_dir + '/' + 'grad_forward_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) for i in range(args.num_steps + args.extra_steps): x_tilt_time.append(x_data) x_data, sampled_mean = f_sample(x_data, temperature) plot_images( x_data, model_dir + '/' + 'sampled_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(i)) x_tilt_time.append(x_data) temp_grad = np.concatenate( (x_tilt_time[-2], x_tilt_time[-1]), axis=1) grad_back.append(temp_grad) ###print 'Recons, On step number, using temperature', i, temperature x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor plot_grad( grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) plot_2D( x_tilt_time, args.num_steps, model_dir + '/' + 'sampled_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) s = np.random.normal(mean_sampled, var_sampled, [args.batch_size, 2]) x_sampled = s temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps + args.extra_steps): x_data, sampled_mean = f_sample(x_data, temperature) spiral_sampled.append(x_data) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor plot_2D( spiral_sampled, args.num_steps, model_dir + '/' + 'inference_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) ipdb.set_trace()
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature, step_chain = build_model( tparams, model_options) inps = [x.astype('float32'), start_temperature, step_chain] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') step_chain_part = T.scalar('step_chain_part', dtype='int32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part) print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' #for param in tparams: # print param # print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 save_data = [] for eidx in xrange(1): print 'Starting Next Epoch ', eidx for data_ in range(500): #train_stream.get_epoch_iterator(): if args.noise == "gaussian": x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( x_data.astype('float32'), temperature, args.num_steps * args.meta_steps - i - 1) print 'On step number, using temperature', i, temperature #reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor count_sample = count_sample + 1 save_data.append(x_data) fname = model_dir + '/batch_index_' + str( batch_index) + '_inference_' + 'epoch_' + str( count_sample) + '_step_' + str(i) np.savez(fname + '.npz', x_data) save2_data = np.asarray(save_data).astype('float32') fname = model_dir + '/generted_images_50000' #+ args.saveto_filename np.savez(fname + '.npz', save_data) ipdb.set_trace()
def transform_batch(self, batch): targets = batch[1] for i in len(targets): targets[i] = self.relabel[targets[i]] batch[1] = targets return batch if __name__ == '__main__': from fuel.datasets import CIFAR10 from fuel.schemes import ShuffledScheme from fuel.streams import DataStream dataset = CIFAR10(('train',), sources=('features','targets'), subset=slice(0,45000)) stream = FilterLabelsTransformer(10, 10, DataStream( dataset=dataset, iteration_scheme=ShuffledScheme( dataset.num_examples, 200)), produces_examples=False) epitr = stream.get_epoch_iterator() out = next(epitr) print out[0].shape, out[1].shape print out[1][:10]
if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features',)) dataset_test = MNIST(['test'], sources=('features',)) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == 'IMAGENET': from imagenet_data import IMAGENET spatial_width = 128 dataset_train = IMAGENET(['train'], width=spatial_width) dataset_test = IMAGENET(['test'], width=spatial_width) n_colors = 3 else: raise ValueError("Unknown dataset %s."%args.dataset) train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples,
def train(args, model_args, lrate): model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 + 'log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_ and os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) ''' x = T.matrix('x', dtype='float32') f=transition_operator(tparams, model_options, x, 1) for data in train_stream.get_epoch_iterator(): print data[0] a = f(data[0]) print a ipdb.set_trace() ''' x, cost = build_model(tparams, model_options) inps = [x] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) print 'Building f_cost...', f_cost = theano.function(inps, cost) print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 0 print 'Number of steps....' print args.num_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): batch_index += 1 n_samples += len(data[0]) uidx += 1 if data[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(data[0]) f_update(lrate) ud = time.time() - ud_start if batch_index % 1 == 0: print 'Cost is this', cost count_sample += 1 from impainting import change_image, inpainting train_temp = data[0] print data[0].shape change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3) train_temp = train_temp.reshape(args.batch_size, 784) output = inpainting(train_temp) change_image(output.reshape(args.batch_size, 1, 28, 28), 1) reverse_time( scl, shft, output, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index)) x_data = np.asarray(output).astype('float32') temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) temperature = args.temperature #* (args.temperature_factor ** (args.num_steps -1 )) orig_impainted_data = np.asarray(data[0]).astype('float32') for i in range(args.num_steps + args.extra_steps + 5): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( x_data, temperature) print 'Impainting using temperature', i, temperature x_data = do_half_image(x_data, orig_impainted_data) reverse_time( scl, shft, x_data, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index) + 'step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature = temperature #temperature /= args.temperature_factor ipdb.set_trace()
def _cifir(): from fuel.datasets import CIFAR10 import os os.environ["FUEL_DATA_PATH"] = os.getcwd() + "/data/" inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.) } batch_size = 64 data_train = CIFAR10(which_sets=['train'], sources=['features']) streams = DataStream( data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size)) train_stream = Flatten(streams) # print train_stream.get_epoch_iterator(as_dict=True).next() # raise features_size = 32 * 32 * 3 inputs = T.matrix('features') inputs = ((inputs / 255.) * 2. - 1.) rng = MRG_RandomStreams(123) prior = Z_prior(dim=512) gen = Generator(input_dim=512, dims=[512, 512, 512, 512, features_size], alpha=0.1, **inits) dis = Discriminator(dims=[features_size, 512, 512 , 512, 512], alpha=0.1, **inits) gan = GAN(dis=dis, gen=gen, prior=prior) gan.initialize() # gradient penalty fake_samples, _ = gan.sampling(inputs.shape[0]) e = rng.uniform(size=(inputs.shape[0], 1)) mixed_input = (e * fake_samples) + (1 - e) * inputs output_d_mixed = gan._dis.apply(mixed_input) grad_mixed = T.grad(T.sum(output_d_mixed), mixed_input) norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=1)) grad_penalty = T.mean(T.square(norm_grad_mixed -1)) y_hat1, y_hat0, z = gan.apply(inputs) d_loss_real = y_hat1.mean() d_loss_fake = y_hat0.mean() d_loss = - d_loss_real + d_loss_fake + 10 * grad_penalty g_loss = - d_loss_fake dis_obj = d_loss gen_obj = g_loss model = Model([y_hat0, y_hat1]) em_loss = -d_loss_real + d_loss_fake em_loss.name = "Earth Move loss" dis_obj.name = 'Discriminator loss' gen_obj.name = 'Generator loss' cg = ComputationGraph([gen_obj, dis_obj]) gen_filter = VariableFilter(roles=[PARAMETER], bricks=gen.linear_transformations) dis_filter = VariableFilter(roles=[PARAMETER], bricks=dis.linear_transformations) gen_params = gen_filter(cg.variables) dis_params = dis_filter(cg.variables) # Prepare the dropout _inputs = [] for brick_ in [gen]: _inputs.extend(VariableFilter(roles=[INPUT], bricks=brick_.linear_transformations)(cg.variables)) cg_dropout = apply_dropout(cg, _inputs, 0.02) gen_obj = cg_dropout.outputs[0] dis_obj = cg_dropout.outputs[1] gan.dis_params = dis_params gan.gen_params = gen_params # gradient penalty algo = AdverserialTraning(gen_obj=gen_obj, dis_obj=dis_obj, model=gan, dis_iter=5, gradient_clip=None, step_rule=RMSProp(learning_rate=1e-4), gen_consider_constant=z) neg_sample = gan.sampling(size=25) from blocks.monitoring.aggregation import mean monitor = TrainingDataMonitoring(variables=[mean(gen_obj), mean(dis_obj), mean(em_loss)], prefix="train", after_batch=True) subdir = './exp/' + 'CIFAR10' + "-" + time.strftime("%Y%m%d-%H%M%S") check_point = Checkpoint("{}/{}".format(subdir, 'CIFAR10'), every_n_epochs=100, save_separately=['log', 'model']) neg_sampling = GenerateNegtiveSample(inputs, neg_sample, img_size=(25, 32, 32, 3), every_n_epochs=10) if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop(algorithm=algo, model=model, data_stream=train_stream, extensions=[Printing(), ProgressBar(), monitor, check_point, neg_sampling]) main_loop.run()
def main(num_epochs=100): x = tensor.tensor4('features') y = tensor.lmatrix('targets') num_filters = 64 layers = [ConvolutionalLayer(filter_size=(8, 8), num_filters=num_filters, num_channels=3, step=(1, 1), border_mode='valid'), MaxPooling(pooling_size=(2, 2)), ConvolutionalLayer(filter_size=(5, 5), num_filters=num_filters, num_channels=num_filters, step=(1, 1), border_mode='valid')] convnet = ConvolutionalNetwork(layers=layers) output_shape = convnet.get_output_shape((32, 32)) convnet.set_input_shape((32, 32)) fc_net = NeuralSoftmax(input_dim=numpy.prod(output_shape) * num_filters, n_classes=10, n_hidden=[500]) convnet_features = convnet.apply(x) probs = fc_net.get_probs(features=convnet_features.flatten(2)) params = convnet.get_params() + fc_net.get_params() weights = convnet.get_weights() cost = fc_net.get_cost(probs=probs, targets=y).mean() cost.name = 'cost' misclassification = fc_net.get_misclassification( probs=probs, targets=y ).mean() misclassification.name = 'misclassification' train_dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) algorithm = GradientDescent( cost=cost, params=params, step_rule=Momentum(learning_rate=.01, momentum=0.1)) train_data_stream = Mapping( data_stream=ForceFloatX( data_stream=DataStream( dataset=train_dataset, iteration_scheme=ShuffledScheme( examples=range(50000), batch_size=100, ) ) ), mapping=Rescale(scale=1/127.5, shift=-127.5) ) valid_data_stream = Mapping( data_stream=ForceFloatX( data_stream=DataStream( dataset=train_dataset, iteration_scheme=SequentialScheme( examples=range(40000, 50000), batch_size=1000, ) ) ), mapping=Rescale(scale=1/127.5, shift=-127.5) ) test_data_stream = Mapping( data_stream=ForceFloatX( data_stream=DataStream( dataset=test_dataset, iteration_scheme=SequentialScheme( examples=test_dataset.num_examples, batch_size=100, ) ) ), mapping=Rescale(scale=1/127.5, shift=-127.5) ) model = Model(cost) extensions = [] extensions.append(Timing()) extensions.append(FinishAfter(after_n_epochs=num_epochs)) extensions.append(DataStreamMonitoring( [cost, misclassification], test_data_stream, prefix='test')) extensions.append(DataStreamMonitoring( [cost, misclassification], valid_data_stream, prefix='valid')) extensions.append(TrainingDataMonitoring( [cost, misclassification], prefix='train', after_epoch=True)) plotters = [] plotters.append(Plotter( channels=[['test_cost', 'test_misclassification', 'train_cost', 'train_misclassification']], titles=['Costs'])) display_train = ImageDataStreamDisplay( data_stream=copy.deepcopy(train_data_stream), image_shape=(3, 32, 32), axes=('c', 0, 1), shift=0, rescale=1., ) weight_display = WeightDisplay( weights=weights, transpose=(0, 1, 2, 3), image_shape=(3, 8, 8), axes=('c', 0, 1), shift=-0.5, rescale=2., ) # Feature maps one_example_train_data_stream = Mapping( data_stream=ForceFloatX( data_stream=DataStream( dataset=train_dataset, iteration_scheme=ShuffledScheme( examples=train_dataset.num_examples, batch_size=1, ) ) ), mapping=Rescale(scale=1/127.5, shift=-127.5) ) displayable_convnet_features = convnet_features.dimshuffle((1, 0, 2, 3)) convnet_features_normalizer = abs( displayable_convnet_features ).max(axis=(1, 2, 3)) displayable_convnet_features = displayable_convnet_features \ / convnet_features_normalizer.dimshuffle((0, 'x', 'x', 'x')) get_displayable_convnet_features = theano.function( [x], displayable_convnet_features ) display_feature_maps_data_stream = Mapping( data_stream=one_example_train_data_stream, mapping=TupleMapping(get_displayable_convnet_features, same_len_out=True) ) display_feature_maps = ImageDataStreamDisplay( data_stream=display_feature_maps_data_stream, image_shape=(1, ) + output_shape, axes=('c', 0, 1), shift=0, rescale=1., ) # Saliency map displayable_saliency_map = tensor.grad(cost, x) saliency_map_normalizer = abs( displayable_saliency_map ).max(axis=(1, 2, 3)) displayable_saliency_map = displayable_saliency_map \ / saliency_map_normalizer.dimshuffle((0, 'x', 'x', 'x')) get_displayable_saliency_map = theano.function( [x, y], displayable_saliency_map ) display_saliency_map_data_stream = Mapping( data_stream=copy.deepcopy(train_data_stream), mapping=TupleMapping(get_displayable_saliency_map, same_len_out=True, same_len_in=True) ) display_saliency_map = ImageDataStreamDisplay( data_stream=display_saliency_map_data_stream, image_shape=(3, 32, 32), axes=('c', 0, 1), shift=0, rescale=1., ) # Deconvolution x_repeated = x.repeat(num_filters, axis=0) convnet_features_repeated = convnet.apply(x_repeated) convnet_features_selected = convnet_features_repeated \ * tensor.eye(num_filters).repeat( x.shape[0], axis=0 ).dimshuffle((0, 1, 'x', 'x')) displayable_deconvolution = tensor.grad(misclassification, x_repeated, known_grads={ convnet_features_selected: convnet_features_selected }) deconvolution_normalizer = abs( displayable_deconvolution ).max(axis=(1, 2, 3)) displayable_deconvolution = displayable_deconvolution \ / deconvolution_normalizer.dimshuffle((0, 'x', 'x', 'x')) get_displayable_deconvolution = theano.function( [x], displayable_deconvolution ) display_deconvolution_data_stream = Mapping( data_stream=one_example_train_data_stream, mapping=TupleMapping(get_displayable_deconvolution, same_len_out=True) ) display_deconvolution = ImageDataStreamDisplay( data_stream=display_deconvolution_data_stream, image_shape=(3, 32, 32), axes=('c', 0, 1), shift=0, rescale=1., ) images_displayer = DisplayImage( image_getters=[display_train, weight_display, display_feature_maps, display_saliency_map, display_deconvolution], titles=['Training examples', 'Convolutional weights', 'Feature maps', 'Sensitivity', 'Deconvolution'] ) plotters.append(images_displayer) extensions.append(PlotManager('CIFAR-10 convnet examples', plotters=plotters, after_epoch=False, every_n_epochs=10, after_training=True)) extensions.append(Printing()) main_loop = MainLoop(model=model, data_stream=train_data_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
import numpy from transformers import ZCA, ContrastNorm, Whitening from fuel.transformers import ForceFloatX from fuel.datasets import CIFAR10 from fuel.streams import DataStream from fuel.schemes import ShuffledScheme, SequentialScheme train_dataset = CIFAR10(['train'], sources=('features', 'targets')) test_dataset = CIFAR10(['test'], sources=('features', 'targets')) batch_size = 400 #Batch size for training batch_size_mon = 4000 # Batch size for monitoring and batch normalization n_batches = int(numpy.ceil(float(train_dataset.num_examples) / batch_size_mon)) num_protos = 10 train_data = train_dataset.get_data( request=range(train_dataset.num_examples))[0] print train_data.shape #cnorm = ContrastNorm() #train_data = cnorm.apply(train_data) #whiten = ZCA() #whiten.fit(3072, train_data) def preprocessing(data_stream, num_examples, batch_size): return data_stream #return ForceFloatX(Whitening(data_stream, ShuffledScheme(num_examples, batch_size), whiten, cnorm), which_sources=('features',)) train_stream_mon = preprocessing(DataStream(train_dataset),
def main(save_to, num_epochs, weight_decay=0.0001, noise_pressure=0, subset=None, num_batches=None, batch_size=None, histogram=None, resume=False): output_size = 10 prior_noise_level = -10 noise_step_rule = Scale(1e-6) noise_rate = theano.shared(numpy.asarray(1e-5, dtype=theano.config.floatX)) convnet = create_res_net(out_noise=True, tied_noise=True, tied_sigma=True, noise_rate=noise_rate, prior_noise_level=prior_noise_level) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet test_probs = convnet.apply(x) test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs) .copy(name='cost')) test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs) .copy(name='error_rate')) test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs) .copy(name='confusion')) test_confusion.tag.aggregation_scheme = Sum(test_confusion) test_cg = ComputationGraph([test_cost, test_error_rate]) # Apply dropout to all layer outputs except final softmax # dropout_vars = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables) # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5) # Apply 0.2 dropout to the pre-averaging layer # dropout_vars_2 = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_8_apply_output$")(test_cg.variables) # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2) # Apply 0.2 dropout to the input, as in the paper # train_cg = apply_dropout(test_cg, [x], 0.2) # train_cg = drop_cg # train_cg = apply_batch_normalization(test_cg) # train_cost, train_error_rate, train_components = train_cg.outputs with batch_normalization(convnet): with training_noise(convnet): train_probs = convnet.apply(x) train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs) .copy(name='cost')) train_components = (ComponentwiseCrossEntropy().apply(y.flatten(), train_probs).copy(name='components')) train_error_rate = (MisclassificationRate().apply(y.flatten(), train_probs).copy(name='error_rate')) train_cg = ComputationGraph([train_cost, train_error_rate, train_components]) population_updates = get_batch_normalization_updates(train_cg) bn_alpha = 0.9 extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha)) for p, m in population_updates] # for annealing nit_penalty = theano.shared(numpy.asarray(noise_pressure, dtype=theano.config.floatX)) nit_penalty.name = 'nit_penalty' # Compute noise rates for training graph train_logsigma = VariableFilter(roles=[LOG_SIGMA])(train_cg.variables) train_mean_log_sigma = tensor.concatenate([n.flatten() for n in train_logsigma]).mean() train_mean_log_sigma.name = 'mean_log_sigma' train_nits = VariableFilter(roles=[NITS])(train_cg.auxiliary_variables) train_nit_rate = tensor.concatenate([n.flatten() for n in train_nits]).mean() train_nit_rate.name = 'nit_rate' train_nit_regularization = nit_penalty * train_nit_rate train_nit_regularization.name = 'nit_regularization' # Apply regularization to the cost trainable_parameters = VariableFilter(roles=[WEIGHT, BIAS])( train_cg.parameters) mask_parameters = [p for p in trainable_parameters if get_brick(p).name == 'mask'] noise_parameters = VariableFilter(roles=[NOISE])(train_cg.parameters) biases = VariableFilter(roles=[BIAS])(train_cg.parameters) weights = VariableFilter(roles=[WEIGHT])(train_cg.variables) nonmask_weights = [p for p in weights if get_brick(p).name != 'mask'] l2_norm = sum([(W ** 2).sum() for W in nonmask_weights]) l2_norm.name = 'l2_norm' l2_regularization = weight_decay * l2_norm l2_regularization.name = 'l2_regularization' # testversion test_cost = test_cost + l2_regularization test_cost.name = 'cost_with_regularization' # Training version of cost train_cost_without_regularization = train_cost train_cost_without_regularization.name = 'cost_without_regularization' train_cost = train_cost + l2_regularization + train_nit_regularization train_cost.name = 'cost_with_regularization' cifar10_train = CIFAR10(("train",)) cifar10_train_stream = RandomPadCropFlip( NormalizeBatchLevels(DataStream.default_stream( cifar10_train, iteration_scheme=ShuffledScheme( cifar10_train.num_examples, batch_size)), which_sources=('features',)), (32, 32), pad=4, which_sources=('features',)) test_batch_size = 128 cifar10_test = CIFAR10(("test",)) cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_test, iteration_scheme=ShuffledScheme( cifar10_test.num_examples, test_batch_size)), which_sources=('features',)) momentum = Momentum(0.01, 0.9) # Create a step rule that doubles the learning rate of biases, like Caffe. # scale_bias = Restrict(Scale(2), biases) # step_rule = CompositeRule([scale_bias, momentum]) # Create a step rule that reduces the learning rate of noise scale_mask = Restrict(noise_step_rule, mask_parameters) step_rule = CompositeRule([scale_mask, momentum]) # from theano.compile.nanguardmode import NanGuardMode # Train with simple SGD algorithm = GradientDescent( cost=train_cost, parameters=trainable_parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) #, # theano_func_kwargs={ # 'mode': NanGuardMode( # nan_is_error=True, inf_is_error=True, big_is_error=True)}) exp_name = save_to.replace('.%d', '') # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), EpochSchedule(momentum.learning_rate, [ (0, 0.01), # Warm up with 0.01 learning rate (50, 0.1), # Then go back to 0.1 (100, 0.01), (150, 0.001) # (83, 0.01), # Follow the schedule in the paper # (125, 0.001) ]), EpochSchedule(noise_step_rule.learning_rate, [ (0, 1e-2), (2, 1e-1), (4, 1) # (0, 1e-6), # (2, 1e-5), # (4, 1e-4) ]), EpochSchedule(noise_rate, [ (0, 1e-2), (2, 1e-1), (4, 1) # (0, 1e-6), # (2, 1e-5), # (4, 1e-4), # (6, 3e-4), # (8, 1e-3), # Causes nit rate to jump # (10, 3e-3), # (12, 1e-2), # (15, 3e-2), # (19, 1e-1), # (24, 3e-1), # (30, 1) ]), NoiseExtension( noise_parameters=noise_parameters), NoisyDataStreamMonitoring( [test_cost, test_error_rate, test_confusion], cifar10_test_stream, noise_parameters=noise_parameters, prefix="test"), TrainingDataMonitoring( [train_cost, train_error_rate, train_nit_rate, train_cost_without_regularization, l2_regularization, train_nit_regularization, momentum.learning_rate, train_mean_log_sigma, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=17), # after_epoch=True), Plot('Training performance for ' + exp_name, channels=[ ['train_cost_with_regularization', 'train_cost_without_regularization', 'train_nit_regularization', 'train_l2_regularization'], ['train_error_rate'], ['train_total_gradient_norm'], ['train_mean_log_sigma'], ], every_n_batches=17), Plot('Test performance for ' + exp_name, channels=[[ 'train_error_rate', 'test_error_rate', ]], after_epoch=True), EpochCheckpoint(save_to, use_cpickle=True, after_epoch=True), ProgressBar(), Printing()] if histogram: attribution = AttributionExtension( components=train_components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(exp_name, True, True)) model = Model(train_cost) main_loop = MainLoop( algorithm, cifar10_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)
def main(save_to, num_epochs, regularization=0.001, subset=None, num_batches=None, batch_size=None, histogram=None, resume=False): output_size = 10 convnet = create_all_conv_net() x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) test_cost = (CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')) test_components = (ComponentwiseCrossEntropy().apply( y.flatten(), probs).copy(name='components')) test_error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) test_confusion = (ConfusionMatrix().apply(y.flatten(), probs).copy(name='confusion')) test_confusion.tag.aggregation_scheme = Sum(test_confusion) test_cg = ComputationGraph([test_cost, test_error_rate, test_components]) # Apply dropout to all layer outputs except final softmax dropout_vars = VariableFilter( roles=[OUTPUT], bricks=[Convolutional], theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables) drop_cg = apply_dropout(test_cg, dropout_vars, 0.5) # Apply 0.2 dropout to the pre-averaging layer # dropout_vars_2 = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_8_apply_output$")(drop_cg.variables) # train_cg = apply_dropout(drop_cg, dropout_vars_2, 0.2) # Apply 0.2 dropout to the input, as in the paper # train_cg = apply_dropout(drop_cg, [x], 0.2) train_cg = drop_cg # train_cg = test_cg train_cost, train_error_rate, train_components = train_cg.outputs # Apply regularization to the cost biases = VariableFilter(roles=[BIAS])(train_cg.parameters) weights = VariableFilter(roles=[WEIGHT])(train_cg.variables) l2_norm = sum([(W**2).sum() for W in weights]) l2_norm.name = 'l2_norm' l2_regularization = regularization * l2_norm l2_regularization.name = 'l2_regularization' test_cost = test_cost + l2_regularization test_cost.name = 'cost_with_regularization' # Training version of cost train_cost_without_regularization = train_cost train_cost_without_regularization.name = 'cost_without_regularization' train_cost = train_cost + regularization * l2_norm train_cost.name = 'cost_with_regularization' cifar10_train = CIFAR10(("train", )) #cifar10_train_stream = RandomPadCropFlip( # NormalizeBatchLevels(DataStream.default_stream( # cifar10_train, iteration_scheme=ShuffledScheme( # cifar10_train.num_examples, batch_size)), # which_sources=('features',)), # (32, 32), pad=5, which_sources=('features',)) cifar10_train_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_train, iteration_scheme=ShuffledScheme(cifar10_train.num_examples, batch_size)), which_sources=('features', )) test_batch_size = 1000 cifar10_test = CIFAR10(("test", )) cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_test, iteration_scheme=ShuffledScheme(cifar10_test.num_examples, test_batch_size)), which_sources=('features', )) momentum = Momentum(0.002, 0.9) # Create a step rule that doubles the learning rate of biases, like Caffe. # scale_bias = Restrict(Scale(2), biases) # step_rule = CompositeRule([scale_bias, momentum]) # step_rule = CompositeRule([StepClipping(100), momentum]) step_rule = momentum # Train with simple SGD algorithm = GradientDescent(cost=train_cost, parameters=train_cg.parameters, step_rule=step_rule) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), EpochSchedule(momentum.learning_rate, [(1, 0.005), (3, 0.01), (5, 0.02), (200, 0.002), (250, 0.0002), (300, 0.00002)]), DataStreamMonitoring([test_cost, test_error_rate, test_confusion], cifar10_test_stream, prefix="test"), TrainingDataMonitoring([ train_cost, train_error_rate, train_cost_without_regularization, l2_regularization, momentum.learning_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", every_n_batches=10), # after_epoch=True), Plot('Training performance for ' + save_to, channels=[ [ 'train_cost_with_regularization', 'train_cost_without_regularization', 'train_l2_regularization' ], ['train_error_rate'], ['train_total_gradient_norm'], ], every_n_batches=10), # after_batch=True), Plot('Test performance for ' + save_to, channels=[[ 'train_error_rate', 'test_error_rate', ]], after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing() ] if histogram: attribution = AttributionExtension(components=train_components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(save_to, True, True)) model = Model(train_cost) main_loop = MainLoop(algorithm, cifar10_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)
slice_100 = slice(0, 32 * 10) else: host_plot = 'http://hades.calculquebec.ca:5042' slice_train = slice(0, n_ex) slice_test = slice(45000, 50000 - 8) slice_valid = slice(40000, 45000 - 8) slice_100 = slice(0, 50000) ## Load cifar10 stream batch_size = 32 num_train_example = slice_train.stop - slice_train.start num_valid_example = slice_valid.stop - slice_valid.start num_test_example = slice_test.stop - slice_test.start num_train_cifar100 = slice_100.stop - slice_100.start train_dataset = CIFAR10(('train', ), subset=slice_train) train_stream = DataStream.default_stream(train_dataset, iteration_scheme=SequentialScheme( train_dataset.num_examples, batch_size)) train_stream = OneHotEncode10(train_stream, which_sources=('targets', )) train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', )) valid_dataset = CIFAR10(('train', ), subset=slice_valid) valid_stream = DataStream.default_stream(valid_dataset, iteration_scheme=SequentialScheme( valid_dataset.num_examples, batch_size)) valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', )) test_dataset = CIFAR10(('train', ), subset=slice_test)
# action.add_argument('-t', '--train', help="Start training the model") # action.add_argument('-s', '--sample', help='Sample images from the trained model') # # parser.add_argument('--experiment', nargs=1, type=str, # help="Change default location to run experiment") # parser.add_argument('--path', nargs=1, type=str, # help="Change default location to save model") if dataset == 'mnist': data = MNIST(("train", ), sources=('features', )) data_test = MNIST(("test", ), sources=('features', )) elif dataset == 'binarized_mnist': data = BinarizedMNIST(("train", ), sources=('features', )) data_test = BinarizedMNIST(("test", ), sources=('features', )) elif dataset == "cifar10": data = CIFAR10(("train", )) data_test = CIFAR10(("test", )) else: pass # Add CIFAR 10 training_stream = DataStream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)) test_stream = DataStream(data_test, iteration_scheme=ShuffledScheme( data_test.num_examples, batch_size)) logger.info("Dataset: {} loaded".format(dataset)) if train: cost, cost_bits_dim = create_network() model, algorithm, extensions = prepare_opti(cost, test_stream, cost_bits_dim)