def test_flatten_batches(self): wrapper = Flatten(DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten_examples(self): wrapper = Flatten(DataStream( IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
def test_flatten_examples(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
def test_flatten_batches(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten(): stream = DataStream(IndexableDataset( OrderedDict([('features', numpy.ones((4, 2, 2))), ('targets', numpy.array([0, 1, 0, 1]))])), iteration_scheme=SequentialScheme(4, 2)) wrapper = Flatten(stream, which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))])
def test_flatten(): stream = DataStream( IndexableDataset(OrderedDict([("features", numpy.ones((4, 2, 2))), ("targets", numpy.array([0, 1, 0, 1]))])), iteration_scheme=SequentialScheme(4, 2), ) wrapper = Flatten(stream, which_sources=("features",)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))], )
def test_flatten(): stream = DataStream( IndexableDataset({'features': numpy.ones((4, 2, 2)), 'targets': numpy.array([0, 1, 0, 1])}), iteration_scheme=SequentialScheme(4, 2)) wrapper = Flatten(stream, which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))])
dataset_test = IMAGENET(['test'], width=spatial_width) n_colors = 3 else: raise ValueError("Unknown dataset %s."%args.dataset) train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) test_stream = Flatten(DataStream.default_stream(dataset_test, iteration_scheme=ShuffledScheme( examples=dataset_test.num_examples, batch_size=args.batch_size)) ) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) # scale is applied before shift train_stream = ScaleAndShift(train_stream, scl, shft) test_stream = ScaleAndShift(test_stream, scl, shft) baseline_uniform_noise = 1./255. # appropriate for MNIST and CIFAR10 Fuel datasets, which are scaled [0,1] uniform_noise = baseline_uniform_noise/scl ## initialize the model dpm = model.DiffusionModel(spatial_width, n_colors, uniform_noise=uniform_noise, **model_args) dpm.initialize()
def train(args, model_args, lrate): model_id = '/data/lisatmp4/anirudhg/minst_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir logger = mimir.Logger(filename=model_dir2 + '/' + model_id2 + 'log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_ and os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) ''' x = T.matrix('x', dtype='float32') f=transition_operator(tparams, model_options, x, 1) for data in train_stream.get_epoch_iterator(): print data[0] a = f(data[0]) print a ipdb.set_trace() ''' x, cost = build_model(tparams, model_options) inps = [x] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) print 'Building f_cost...', f_cost = theano.function(inps, cost) print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 0 print 'Number of steps....' print args.num_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): batch_index += 1 n_samples += len(data[0]) uidx += 1 if data[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(data[0]) f_update(lrate) ud = time.time() - ud_start if batch_index % 1 == 0: print 'Cost is this', cost count_sample += 1 from impainting import change_image, inpainting train_temp = data[0] print data[0].shape change_image(train_temp.reshape(args.batch_size, 1, 28, 28), 3) train_temp = train_temp.reshape(args.batch_size, 784) output = inpainting(train_temp) change_image(output.reshape(args.batch_size, 1, 28, 28), 1) reverse_time( scl, shft, output, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index)) x_data = np.asarray(output).astype('float32') temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) temperature = args.temperature #* (args.temperature_factor ** (args.num_steps -1 )) orig_impainted_data = np.asarray(data[0]).astype('float32') for i in range(args.num_steps + args.extra_steps + 5): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( x_data, temperature) print 'Impainting using temperature', i, temperature x_data = do_half_image(x_data, orig_impainted_data) reverse_time( scl, shft, x_data, model_dir + '/' + 'impainting_orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index) + 'step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature = temperature #temperature /= args.temperature_factor ipdb.set_trace()
def main(name, model, epochs, batch_size, learning_rate, bokeh, layers, gamma, rectifier, predict, dropout, qlinear, sparse): runname = "vae%s-L%s%s%s%s-l%s-g%s-b%d" % (name, layers, 'r' if rectifier else '', 'd' if dropout else '', 'l' if qlinear else '', shnum(learning_rate), shnum(gamma), batch_size//100) if rectifier: activation = Rectifier() full_weights_init = Orthogonal() else: activation = Tanh() full_weights_init = Orthogonal() if sparse: runname += '-s%d'%sparse weights_init = Sparse(num_init=sparse, weights_init=full_weights_init) else: weights_init = full_weights_init layers = map(int,layers.split(',')) encoder_layers = layers[:-1] encoder_mlp = MLP([activation] * (len(encoder_layers)-1), encoder_layers, name="MLP_enc", biases_init=Constant(0.), weights_init=weights_init) enc_dim = encoder_layers[-1] z_dim = layers[-1] if qlinear: sampler = Qlinear(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init) else: sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init) decoder_layers = layers[:] ## includes z_dim as first layer decoder_layers.reverse() decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Logistic()], decoder_layers, name="MLP_dec", biases_init=Constant(0.), weights_init=weights_init) vae = VAEModel(encoder_mlp, sampler, decoder_mlp) vae.initialize() x = tensor.matrix('features')/256. x.tag.test_value = np.random.random((batch_size,layers[0])).astype(np.float32) if predict: mean_z, enc = vae.mean_z(x) # cg = ComputationGraph([mean_z, enc]) newmodel = Model([mean_z,enc]) else: x_recons, kl_terms = vae.reconstruct(x) recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.mean() cg = ComputationGraph([cost]) if gamma > 0: weights = VariableFilter(roles=[WEIGHT])(cg.variables) cost += gamma * blocks.theano_expressions.l2_norm(weights) cost.name = "nll_bound" newmodel = Model(cost) if dropout: from blocks.roles import INPUT inputs = VariableFilter(roles=[INPUT])(cg.variables) # dropout_target = [v for k,v in newmodel.get_params().iteritems() # if k.find('MLP')>=0 and k.endswith('.W') and not k.endswith('MLP_enc/linear_0.W')] dropout_target = filter(lambda x: x.name.startswith('linear_'), inputs) cg = apply_dropout(cg, dropout_target, 0.5) target_cost = cg.outputs[0] else: target_cost = cost if name == 'mnist': if predict: train_ds = MNIST("train") else: train_ds = MNIST("train", sources=['features']) test_ds = MNIST("test") else: datasource_dir = os.path.join(fuel.config.data_path, name) datasource_fname = os.path.join(datasource_dir , name+'.hdf5') if predict: train_ds = H5PYDataset(datasource_fname, which_set='train') else: train_ds = H5PYDataset(datasource_fname, which_set='train', sources=['features']) test_ds = H5PYDataset(datasource_fname, which_set='test') train_s = Flatten(DataStream(train_ds, iteration_scheme=ShuffledScheme( train_ds.num_examples, batch_size))) test_s = Flatten(DataStream(test_ds, iteration_scheme=ShuffledScheme( test_ds.num_examples, batch_size))) if predict: from itertools import chain fprop = newmodel.get_theano_function() allpdata = None alledata = None f = train_s.sources.index('features') assert f == test_s.sources.index('features') sources = test_s.sources alllabels = dict((s,[]) for s in sources if s != 'features') for data in chain(train_s.get_epoch_iterator(), test_s.get_epoch_iterator()): for s,d in zip(sources,data): if s != 'features': alllabels[s].extend(list(d)) pdata, edata = fprop(data[f]) if allpdata is None: allpdata = pdata else: allpdata = np.vstack((allpdata, pdata)) if alledata is None: alledata = edata else: alledata = np.vstack((alledata, edata)) print 'Saving',allpdata.shape,'intermidiate layer, for all training and test examples, to',name+'_z.npy' np.save(name+'_z', allpdata) print 'Saving',alledata.shape,'last encoder layer to',name+'_e.npy' np.save(name+'_e', alledata) print 'Saving additional labels/targets:',','.join(alllabels.keys()), print ' of size',','.join(map(lambda x: str(len(x)),alllabels.values())), print 'to',name+'_labels.pkl' with open(name+'_labels.pkl','wb') as fp: pickle.dump(alllabels, fp, -1) else: cg = ComputationGraph([target_cost]) algorithm = GradientDescent( cost=target_cost, params=cg.parameters, step_rule=Adam(learning_rate) # Scale(learning_rate=learning_rate) ) extensions = [] if model: extensions.append(Load(model)) extensions += [Timing(), FinishAfter(after_n_epochs=epochs), DataStreamMonitoring( [cost, recons_term], test_s, prefix="test"), TrainingDataMonitoring( [cost, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(runname, every_n_epochs=10), Printing()] if bokeh: extensions.append(Plot( 'Auto', channels=[ ['test_recons_term','test_nll_bound','train_nll_bound' ], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, train_s, model=newmodel, extensions=extensions) main_loop.run()
import SVRT_analysis_helper_functions #Initialize vars and theano function num_estimates = 20 num_training = 6 #per evaluation num_testing = 1 #per evaluation batch_size = num_estimates * num_training + num_estimates * num_testing #get a bunch just to be sure image_size, channels, data_train, data_valid, data_test = datasets.get_data('sketch') rows = 10 cols = 20 N_iter = 64; #Load images train_stream = Flatten(DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) train_batch = train_stream.get_epoch_iterator() train_image = train_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) train_labels = train_batch.data_stream.get_data()[1] test_stream = Flatten(DataStream.default_stream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) test_batch = test_stream.get_epoch_iterator() test_image = test_batch.data_stream.get_data()[0][:].reshape(batch_size,32,32) test_labels = test_batch.data_stream.get_data()[1] #Load old model #model_file = 'new_test-20160313-125114/new_test_model' model_file = 'all_params-20160315-221022/all_params_model' with open(model_file,"rb") as f: model = pickle.load(f) draw = model.get_top_bricks()[0]
n_iter=n_iter, sources=("features", "bbox_lefts", "bbox_tops", "bbox_widths", "bbox_heights"), ) batch_size = 1000 num_examples = int(svhn.num_examples / batch_size) + 1 evaluation = True # num_examples = 100 # batch_size = 1 # evaluation = False svhn_stream = Flatten( DataStream.default_stream(svhn, iteration_scheme=SequentialScheme(svhn.num_examples, batch_size)) ) svhn_stream.get_epoch_iterator() x = T.fmatrix("features") batch_size = T.iscalar("batch_size") center_y, center_x, deltaY, deltaX = locator.find(x, batch_size) do_sample = theano.function( [x, batch_size], outputs=[center_y, center_x, deltaY, deltaX], allow_input_downcast=True ) overlap = 0.0 distance = 0.0 for i in range(0, num_examples): image = svhn_stream.get_data()
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature, step_chain = build_model( tparams, model_options) inps = [x.astype('float32'), start_temperature, step_chain] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') step_chain_part = T.scalar('step_chain_part', dtype='int32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part) print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' #for param in tparams: # print param # print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 save_data = [] for eidx in xrange(1): print 'Starting Next Epoch ', eidx for data_ in range(500): #train_stream.get_epoch_iterator(): if args.noise == "gaussian": x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( x_data.astype('float32'), temperature, args.num_steps * args.meta_steps - i - 1) print 'On step number, using temperature', i, temperature #reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor count_sample = count_sample + 1 save_data.append(x_data) fname = model_dir + '/batch_index_' + str( batch_index) + '_inference_' + 'epoch_' + str( count_sample) + '_step_' + str(i) np.savez(fname + '.npz', x_data) save2_data = np.asarray(save_data).astype('float32') fname = model_dir + '/generted_images_50000' #+ args.saveto_filename np.savez(fname + '.npz', save_data) ipdb.set_trace()
''' ''' qw = np.load('/data/lisatmp3/anirudhg/mnist_walk_back/walkback_-170217T154923/batch_index_1_inference_means_epoch_1_step_39.npz') qw = qw['X'] qw = qw.reshape((100, 784)) for i in range(99): qw2 = np.load('/data/lisatmp3/anirudhg/mnist_walk_back/walkback_-170217T154923/batch_index_' + str(i+2) + '_inference_means_epoch_1_step_39.npz') qw2 = qw2['X'] qw2 = qw2.reshape((100, 784)) qw = np.concatenate([qw, qw2]) np.savez('gen_samples_means_170217T154923_model_3000.npz', qw) qw = np.load('gen_samples_means_170217T154923_model_3000.npz') qw = qw['arr_0'] qw = qw.reshape((10000, 784)) ''' for data in train_stream.get_epoch_iterator(): test_data = data[0] ipdb.set_trace() ''' var = 0.01 for i in range(98): pwin = ParzenWindows(qw, var) ll = pwin.get_ll(test_data) print ll, var var += 0.01 '''
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams ''' x = T.matrix('x', dtype='float32') temp = T.scalar('temp', dtype='float32') f=transition_operator(tparams, model_options, x, temp) for data in train_stream.get_epoch_iterator(): print data[0] a = f([data[0], 1.0, 1]) #ipdb.set_trace() ''' x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' for param in tparams: print param print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size, 3 * 32 * 32), ) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( [data_run, temperature_forward, 1]) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. t1 = time.time() #print time.time() - t1, "time to get grads" t1 = time.time() logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) #'Norm_1': np.linalg.norm(gradient_updates_[0]), #'Norm_2': np.linalg.norm(gradient_updates_[1]), #'Norm_3': np.linalg.norm(gradient_updates_[2]), #'Norm_4': np.linalg.norm(gradient_updates_[3])}) #print time.time() - t1, "time to log" #print time.time() - t0, "total time in batch" t5 = time.time() if batch_index % 20 == 0: print batch_index, "cost", cost if batch_index % 200 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [data_use[0], temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [x_data, temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On backward step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/' + "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor if args.noise == "gaussian": x_sampled = np.random.normal( 0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
mnist_train = MNIST(('train',))#, sources=('features', 'targets')) num_examples = 10#mnist_train.num_examples train_data_stream = Flatten(DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size))) train_monitor_stream = Flatten(DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(num_examples, batch_size=batch_size))) x = T.matrix('features') y = T.matrix('targets') epoch = train_data_stream.get_epoch_iterator() for j, batch in enumerate(epoch): if j>0: break theano.config.compute_test_value = 'warn' x.tag.test_value = batch[0]#numpy.random.rand(100, 28**2).astype('float32') y.tag.test_value = batch[1]#numpy.random.randint(0, 10, size=(100,1)).astype('float32') gradients = T.grad(cost, RNN.params) gradients.name = 'gradients'
return net['conv1_1'] if __name__ == '__main__': from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % 32), batch_size=32))) shp = next(train_stream.get_epoch_iterator())[0].shape input_ = T.tensor4('inputs_var') unet = buildUnet(1, dropout=True, input_var=input_, trainable=True) output = unet.get_output_for(input_) test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0] #test_prediction_dimshuffle = test_prediction.dimshuffle((0, 2, 3, 1)) pred_fcn_fn = theano.function([input_], test_prediction) for data in train_stream.get_epoch_iterator(): data_use = (data[0].reshape(32, 1, 28, 28), ) out_put = pred_fcn_fn(data_use[0]) import ipdb ipdb.set_trace()
num_examples = data_test.num_examples n_samples = (int(s) for s in args.nsamples.split(",")) dict_p = {} dict_ps = {} for K in n_samples: batch_size = max(args.max_batch // K, 1) stream = Flatten(DataStream( data_test, iteration_scheme=ShuffledScheme(num_examples, batch_size) ), which_sources='features') log_p = np.asarray([]) log_ps = np.asarray([]) for batch in stream.get_epoch_iterator(as_dict=True): log_p_, log_ps_ = do_nll(batch['features'], K) log_p = np.concatenate((log_p, log_p_)) log_ps = np.concatenate((log_ps, log_ps_)) log_p_ = stats.sem(log_p) log_p = np.mean(log_p) log_ps_ = stats.sem(log_ps) log_ps = np.mean(log_ps) dict_p[K] = log_p dict_ps[K] = log_ps if estimate_z: print("log p / log p~ / log p* [%6d spls]: %5.2f+-%4.2f / %5.2f+-%4.2f / %5.2f" %
num_examples = data_test.num_examples n_samples = (int(s) for s in args.nsamples.split(",")) dict_p = {} dict_ps = {} for K in n_samples: batch_size = max(args.max_batch // K, 1) stream = Flatten(DataStream(data_test, iteration_scheme=ShuffledScheme( num_examples, batch_size)), which_sources='features') log_p = np.asarray([]) log_ps = np.asarray([]) for batch in stream.get_epoch_iterator(as_dict=True): log_p_, log_ps_ = do_nll(batch['features'], K) log_p = np.concatenate((log_p, log_p_)) log_ps = np.concatenate((log_ps, log_ps_)) log_p_ = stats.sem(log_p) log_p = np.mean(log_p) log_ps_ = stats.sem(log_ps) log_ps = np.mean(log_ps) dict_p[K] = log_p dict_ps[K] = log_ps if estimate_z: print(
# ## Transformers # In[13]: from fuel.transformers import Flatten # In[14]: data_stream = Flatten(data_stream) # In[15]: epoch = data_stream.get_epoch_iterator() batch = next(epoch) # (ndarray, dnarray) # In[16]: batch[0].shape # In[17]: batch[1].shape # ## Model / Bricks