def load_datastream(train_batch_size=100): from fuel.datasets.mnist import MNIST from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme MNIST.default_transformers = ( (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}), (Cast, [np.float32], {'which_sources': 'features'}), ) mnist_train = MNIST(('train',), subset=slice(None, 50000)) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(mnist_train.num_examples, train_batch_size) ) mnist_validation = MNIST(('train',), subset=slice(50000, None)) mnist_validation_stream = DataStream.default_stream( mnist_validation, iteration_scheme=SequentialScheme(mnist_validation.num_examples, 250) ) mnist_test = MNIST(('test',)) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, 250) ) return { 'train': mnist_train_stream, 'validation': mnist_validation_stream, 'test': mnist_test_stream }
def get_streams(num_train_examples, batch_size, use_test=True): dataset = MNIST(("train",)) all_ind = numpy.arange(dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(all_ind) indices_train = all_ind[:num_train_examples] indices_valid = all_ind[num_train_examples:] tarin_stream = Flatten( DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(indices_train, batch_size)) ) valid_stream = None if len(indices_valid) != 0: valid_stream = Flatten( DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(indices_valid, batch_size)) ) test_stream = None if use_test: dataset = MNIST(("test",)) ind = numpy.arange(dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(all_ind) test_stream = Flatten(DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme(ind, batch_size))) return tarin_stream, valid_stream, test_stream
def maxout_vae_mnist_test(path_vae_mnist): # load vae model on mnist vae_mnist = load(path_vae_mnist) maxout = Maxout() x = T.matrix('features') y = T.imatrix('targets') batch_size = 128 z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x)) predict = maxout.apply(z) cost = Softmax().categorical_cross_entropy(y.flatten(), predict) y_hat = Softmax().apply(predict) cost.name = 'cost' cg = ComputationGraph(cost) temp = cg.parameters for t, i in zip(temp, range(len(temp))): t.name = t.name+str(i)+"maxout" error_brick = MisclassificationRate() error_rate = error_brick.apply(y, y_hat) # training step_rule = RMSProp(0.01, 0.9) #step_rule = Momentum(0.2, 0.9) train_set = MNIST('train') test_set = MNIST("test") data_stream_train = Flatten(DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))) data_stream_test =Flatten(DataStream.default_stream( test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size))) algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=step_rule) monitor_train = TrainingDataMonitoring( variables=[cost], data_stream=data_stream_train, prefix="train") monitor_valid = DataStreamMonitoring( variables=[cost, error_rate], data_stream=data_stream_test, prefix="test") extensions = [ monitor_train, monitor_valid, FinishAfter(after_n_epochs=50), Printing(every_n_epochs=1) ] main_loop = MainLoop(data_stream=data_stream_train, algorithm=algorithm, model = Model(cost), extensions=extensions) main_loop.run() # save here from blocks.serialization import dump with closing(open('../data_mnist/maxout', 'w')) as f: dump(maxout, f)
def prepare_cifar10(): class Dataset: pass result = Dataset() CIFAR10.default_transformers = ( (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}), (Cast, [np.float32], {'which_sources': 'features'})) mean = cifar10_mean() def patch_get_epoch_iterator(stream): def get_epoch_iterator(self): for X, Y in self._get_epoch_iterator(): # 0 degrees X -= mean[numpy.newaxis,:,:,:] yield augument(X, 25), Y stream._get_epoch_iterator = stream.get_epoch_iterator stream.get_epoch_iterator = types.MethodType(get_epoch_iterator, stream) def patch_get_epoch_iterator_test(stream): def get_epoch_iterator(self): for X, Y in self._get_epoch_iterator(): # 0 degrees X -= mean[numpy.newaxis,:,:,:] yield X, Y stream._get_epoch_iterator = stream.get_epoch_iterator stream.get_epoch_iterator = types.MethodType(get_epoch_iterator, stream) result.train = train = CIFAR10(("train",), subset = slice(None, 40000)) result.train_stream = DataStream.default_stream( result.train, iteration_scheme = ShuffledScheme(result.train.num_examples, 25)) patch_get_epoch_iterator(result.train_stream) result.validation = CIFAR10(("train",), subset=slice(40000, None)) result.validation_stream = DataStream.default_stream( result.validation, iteration_scheme = SequentialScheme(result.validation.num_examples, 100)) patch_get_epoch_iterator(result.validation_stream) result.test = CIFAR10(("test",)) result.test_stream = DataStream.default_stream( result.test, iteration_scheme = SequentialScheme(result.test.num_examples, 100)) patch_get_epoch_iterator_test(result.test_stream) return result
def main(save_to, num_epochs, bokeh=False): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate], Flatten( DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features',)), prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] main_loop = MainLoop( algorithm, Flatten( DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features',)), model=Model(cost), extensions=extensions) main_loop.run()
def get_stream(batch_size, input_size, test=False): from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream from fuel.schemes import ShuffledScheme, SequentialScheme ,SequentialExampleScheme from fuel.transformers.image import RandomFixedSizeCrop from fuel.transformers import Flatten #, ForceFloatX from ScikitResize import ScikitResize from fuel.transformers import Cast # Load the training set if test : train = DogsVsCats(('train',),subset=slice(0, 30)) valid = DogsVsCats(('train',),subset=slice(19980, 20000)) test = DogsVsCats(('test',),subset=slice(0,4)) else : train = DogsVsCats(('train',),subset=slice(0,22000)) valid = DogsVsCats(('train',),subset=slice(22000, 25000)) test = DogsVsCats(('test',)) #Generating stream train_stream = DataStream.default_stream( train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size) ) valid_stream = DataStream.default_stream( valid, iteration_scheme=ShuffledScheme(valid.num_examples, batch_size) ) test_stream = DataStream.default_stream( test, iteration_scheme=SequentialScheme(test.num_examples, 1) # iteration_scheme=SequentialExampleScheme(test.num_examples) ) #Reshaping procedure #Apply crop and resize to desired square shape train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',)) valid_stream = ScikitResize(valid_stream, input_size, which_sources=('image_features',)) test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',)) #ForceFloatX, to spare you from possible bugs #train_stream = ForceFloatX(train_stream) #valid_stream = ForceFloatX(valid_stream) #test_stream = ForceFloatX(test_stream) #Cast instead of forcefloatX train_stream = Cast(train_stream, dtype='float32',which_sources=('image_features',)) valid_stream = Cast(valid_stream, dtype='float32',which_sources=('image_features',)) test_stream = Cast(test_stream, dtype='float32',which_sources=('image_features',)) return train_stream, valid_stream, test_stream
def test_cifar10(): train = CIFAR10(('train',), load_in_memory=False) assert train.num_examples == 50000 handle = train.open() features, targets = train.get_data(handle, slice(49990, 50000)) assert features.shape == (10, 3, 32, 32) assert targets.shape == (10, 1) train.close(handle) test = CIFAR10(('test',), load_in_memory=False) handle = test.open() features, targets = test.get_data(handle, slice(0, 10)) assert features.shape == (10, 3, 32, 32) assert targets.shape == (10, 1) assert features.dtype == numpy.uint8 assert targets.dtype == numpy.uint8 test.close(handle) stream = DataStream.default_stream( test, iteration_scheme=SequentialScheme(10, 10)) data = next(stream.get_epoch_iterator())[0] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX assert_raises(ValueError, CIFAR10, ('valid',)) dummy = CIFAR10(('train',), subset=slice(50000, 60000)) handle = dummy.open() assert_raises(ValueError, dummy.get_data, handle, slice(0, 10000)) dummy.close(handle)
def create_dataset(dataset): if trainning: scheme = ShuffledScheme(dataset.num_examples, 32) else: scheme = SequentialScheme(dataset.num_examples, 32) stream = DataStream.default_stream(dataset, iteration_scheme=scheme) return ResizeTransformer(stream, image_size)
def get_mnist_video_streams(batch_size): train_dataset = ClutteredMNISTVideo(which_sets=["train"]) valid_dataset = ClutteredMNISTVideo(which_sets=["valid"]) train_ind = numpy.arange(train_dataset.num_examples) valid_ind = numpy.arange(valid_dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(train_ind) rng.shuffle(valid_ind) train_datastream = DataStream.default_stream(train_dataset, iteration_scheme=ShuffledScheme(train_ind, batch_size)) train_datastream = PreprocessTransformer(train_datastream) valid_datastream = DataStream.default_stream(valid_dataset, iteration_scheme=ShuffledScheme(valid_ind, batch_size)) valid_datastream = PreprocessTransformer(valid_datastream) return train_datastream, valid_datastream
def monk_music_stream (which_sets = ('train',),batch_size = 64, seq_size=128, frame_size=160, num_examples= None, which_sources = ('features',)): """ This function generates the stream for the monk_music dataset. It doesn't compute incremental windows and instead simply separates the dataset into sequences """ dataset = MonkMusic(which_sets = which_sets, filename = "dataset.hdf5", load_in_memory=True) large_batch_size = batch_size * frame_size * seq_size if not num_examples: num_examples = large_batch_size*(dataset.num_examples/large_batch_size) # If there are memory problems revert to SequentialScheme data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( num_examples, large_batch_size)) data_stream = ScaleAndShift(data_stream, scale = 1./data_stats["std"], shift = -data_stats["mean"]/data_stats["std"]) data_stream = Mapping(data_stream, lambda data: _get_subsequences(data,batch_size,seq_size,frame_size)) data_stream = ForceFloatX(data_stream) return data_stream
def get_cmv_v1_streams(batch_size): train_dataset = CMVv1(which_sets=["train"]) valid_dataset = CMVv1(which_sets=["valid"]) train_ind = numpy.arange(train_dataset.num_examples) valid_ind = numpy.arange(valid_dataset.num_examples) rng = numpy.random.RandomState(seed=1) rng.shuffle(train_ind) rng.shuffle(valid_ind) train_datastream = DataStream.default_stream(train_dataset, iteration_scheme=ShuffledScheme(train_ind, batch_size)) train_datastream = Preprocessor_CMV_v1(train_datastream) valid_datastream = DataStream.default_stream(valid_dataset, iteration_scheme=ShuffledScheme(valid_ind, batch_size)) valid_datastream = Preprocessor_CMV_v1(valid_datastream) return train_datastream, valid_datastream
def DStream(datatype, config): if datatype=='train': filename = config['train_file'] elif datatype == 'valid': filename = config['valid_file'] elif datatype == 'test': filename = config['test_file'] else: logger.error('wrong datatype, train, valid, or test') data = TextFile(files=[filename], dictionary=pickle.load(open(config['train_dic'],'rb')), unk_token=config['unk_token'], level='word', bos_token=config['bos_token'], eos_token=config['eos_token']) data_stream = DataStream.default_stream(data) data_stream.sources = ('sentence',) # organize data in batches and pad shorter sequences with zeros batch_size = config['batch_size'] data_stream = Batch(data_stream, iteration_scheme=ConstantScheme(batch_size)) data_stream = Padding(data_stream) return data_stream
def test_cifar100(): train = CIFAR100('train', load_in_memory=False) assert train.num_examples == 50000 handle = train.open() coarse_labels, features, fine_labels = train.get_data(handle, slice(49990, 50000)) assert features.shape == (10, 3, 32, 32) assert coarse_labels.shape == (10, 1) assert fine_labels.shape == (10, 1) train.close(handle) test = CIFAR100('test', load_in_memory=False) handle = test.open() coarse_labels, features, fine_labels = test.get_data(handle, slice(0, 10)) assert features.shape == (10, 3, 32, 32) assert coarse_labels.shape == (10, 1) assert fine_labels.shape == (10, 1) assert features.dtype == numpy.uint8 assert coarse_labels.dtype == numpy.uint8 assert fine_labels.dtype == numpy.uint8 test.close(handle) stream = DataStream.default_stream( test, iteration_scheme=SequentialScheme(10, 10)) data = next(stream.get_epoch_iterator())[1] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX assert_raises(ValueError, CIFAR100, 'valid')
def open_stream(which_sets= ('train',), port=5557, num_examples = None): dataset = Blizzard(which_sets = which_sets) if num_examples == None: num_examples = dataset.num_examples data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale = 1/data_std, shift = -data_mean/data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled',)) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources = ('residual',)) data_stream = FilterSources(data_stream, sources = ('upsampled', 'residual',)) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) start_server(data_stream, port=port)
def fuel_data_to_list(fuel_data, shuffle): if(shuffle): scheme = ShuffledScheme(fuel_data.num_examples, fuel_data.num_examples) else: scheme = SequentialScheme(fuel_data.num_examples, fuel_data.num_examples) fuel_data_stream = DataStream.default_stream(fuel_data, iteration_scheme=scheme) return fuel_data_stream.get_epoch_iterator().next()
def create_svhn_data_streams(batch_size, monitoring_batch_size, rng=None): train_set = SVHN(2, ('extra',), sources=('features',)) valid_set = SVHN(2, ('train',), sources=('features',)) main_loop_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size, rng=rng)) train_monitor_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) valid_monitor_stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) return main_loop_stream, train_monitor_stream, valid_monitor_stream
def cifar10_mean(): train = CIFAR10(("train",), subset=slice(None, 40000)) train_stream = DataStream.default_stream(train, iteration_scheme = SequentialScheme(train.num_examples, 100)) X = numpy.array([numpy.mean(X, 0) for X, _ in train_stream.get_epoch_iterator()]) X = numpy.mean(X, 0) return X
def create_celeba_data_streams(batch_size, monitoring_batch_size, sources=('features', ), rng=None): train_set = CelebA('64', ('train',), sources=sources) valid_set = CelebA('64', ('valid',), sources=sources) main_loop_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size, rng=rng)) train_monitor_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) valid_monitor_stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) return main_loop_stream, train_monitor_stream, valid_monitor_stream
def create_tiny_imagenet_data_streams(batch_size, monitoring_batch_size, rng=None): train_set = TinyILSVRC2012(('train',), sources=('features',)) valid_set = TinyILSVRC2012(('valid',), sources=('features',)) main_loop_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size, rng=rng)) train_monitor_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( 4096, monitoring_batch_size, rng=rng)) valid_monitor_stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme( 4096, monitoring_batch_size, rng=rng)) return main_loop_stream, train_monitor_stream, valid_monitor_stream
def set_datastream(data_path, batch_size): dataset = H5PYDataset(file_or_path=data_path, which_sets=('train',), sources=('input_feature', 'target_feature')) data_stream = DataStream.default_stream(dataset=dataset, iteration_scheme=ShuffledScheme(batch_size=batch_size, examples=dataset.num_examples)) return data_stream
def create_data(data): stream = DataStream.default_stream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size)) stream_downscale = MinimumImageDimensions(stream, image_size, which_sources=('image_features',)) #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',)) stream_max = ScikitResize(stream_downscale, image_size, which_sources=('image_features',)) stream_scale = ScaleAndShift(stream_max, 1./255, 0, which_sources=('image_features',)) stream_cast = Cast(stream_scale, dtype='float32', which_sources=('image_features',)) #stream_flat = Flatten(stream_scale, which_sources=('image_features',)) return stream_cast
def create_act_table(self, save_to, act_table): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict((full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])( cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) max_activation_table = (MaxActivationTable().apply( outs).copy(name='max_activation_table')) max_activation_table.tag.aggregation_scheme = ( Concatenate(max_activation_table)) model = Model([ error_rate, max_activation_table]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test_stream = DataStream.default_stream( self.mnist_test, iteration_scheme=SequentialScheme( self.mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([ error_rate, max_activation_table ]) results = evaluator.evaluate(mnist_test_stream) table = results['max_activation_table'] pickle.dump(table, open(act_table, 'wb')) return table
def get_stream(self, which_set, scheme=None): if not scheme: scheme = ShuffledScheme( self.datasets[which_set].num_examples / self.shrink_dataset_by, self.batch_size) return DataStream.default_stream( dataset=self.datasets[which_set], iteration_scheme=scheme)
def create_cifar10_data_streams(batch_size, monitoring_batch_size, rng=None): train_set = CIFAR10( ('train',), sources=('features',), subset=slice(0, 45000)) valid_set = CIFAR10( ('train',), sources=('features',), subset=slice(45000, 50000)) main_loop_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size, rng=rng)) train_monitor_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) valid_monitor_stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme( 5000, monitoring_batch_size, rng=rng)) return main_loop_stream, train_monitor_stream, valid_monitor_stream
def create_streams(train_set, valid_set, test_set, training_batch_size, monitoring_batch_size): """Creates data streams for training and monitoring. Parameters ---------- train_set : :class:`fuel.datasets.Dataset` Training set. valid_set : :class:`fuel.datasets.Dataset` Validation set. test_set : :class:`fuel.datasets.Dataset` Test set. monitoring_batch_size : int Batch size for monitoring. include_targets : bool If ``True``, use both features and targets. If ``False``, use features only. Returns ------- rval : tuple of data streams Data streams for the main loop, the training set monitor, the validation set monitor and the test set monitor. """ main_loop_stream = DataStream.default_stream( dataset=train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, training_batch_size)) train_monitor_stream = DataStream.default_stream( dataset=train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, monitoring_batch_size)) valid_monitor_stream = DataStream.default_stream( dataset=valid_set, iteration_scheme=ShuffledScheme( valid_set.num_examples, monitoring_batch_size)) test_monitor_stream = DataStream.default_stream( dataset=test_set, iteration_scheme=ShuffledScheme( test_set.num_examples, monitoring_batch_size)) return (main_loop_stream, train_monitor_stream, valid_monitor_stream, test_monitor_stream)
def getTextFile(filename, dic_path, config): data = TextFile(files=[filename], dictionary=pickle.load(open(dic_path,'rb')), unk_token=config['unk_token'], level='word', bos_token=config['bos_token'], eos_token=config['eos_token']) data_stream = DataStream.default_stream(data) data_stream.sources = ('sentence',) return data_stream
def _test_dataset(): train = DogsVsCats(('train',)) assert train.num_examples == 25000 assert_raises(ValueError, DogsVsCats, ('valid',)) test = DogsVsCats(('test',)) stream = DataStream.default_stream( test, iteration_scheme=SequentialScheme(10, 10)) data = next(stream.get_epoch_iterator())[0][0] assert data.dtype.kind == 'f'
def create_main_loop(dataset, nvis, nhid, num_epochs, debug_level=0, lrate=1e-3): seed = 188229 n_inference_steps = 6 num_examples = dataset.num_examples batch_size = num_examples train_loop_stream = Flatten( DataStream.default_stream( dataset=dataset, iteration_scheme=SequentialScheme(dataset.num_examples, batch_size) # Repeat( # , n_inference_steps) # ShuffledScheme(dataset.num_examples, batch_size), n_inference_steps)) ), which_sources=("features",), ) model_brick = FivEM( nvis=nvis, nhid=nhid, epsilon=0.01, batch_size=batch_size, weights_init=IsotropicGaussian(0.1), biases_init=Constant(0), noise_scaling=1, debug=debug_level, lateral_x=False, lateral_h=False, n_inference_steps=n_inference_steps, ) model_brick.initialize() x = tensor.matrix("features") cost = model_brick.cost(x) computation_graph = ComputationGraph([cost]) model = Model(cost) # step_rule = Adam(learning_rate=2e-5, beta1=0.1, beta2=0.001, epsilon=1e-8, # decay_factor=(1 - 1e-8)) step_rule = Momentum(learning_rate=lrate, momentum=0.95) # step_rule = AdaDelta() # step_rule = RMSProp(learning_rate=0.01) # step_rule = AdaGrad(learning_rate=1e-4) algorithm = GradientDescent(cost=cost, params=computation_graph.parameters, step_rule=step_rule) algorithm.add_updates(computation_graph.updates) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), TrainingDataMonitoring([cost] + computation_graph.auxiliary_variables, after_batch=False, after_epoch=True), # every_n_epochs=1), Printing(after_epoch=True, after_batch=False), # every_n_epochs=1, # Checkpoint(path="./fivem.zip",every_n_epochs=10,after_training=True) ] main_loop = MainLoop(model=model, data_stream=train_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def getDataStream(dataset, batch_size): stream = Flatten(DataStream.default_stream( dataset=dataset , iteration_scheme=ShuffledScheme(dataset.num_examples, batch_size=batch_size))) stream = Digit2String(stream, which_sources=('targets',)) stream = Words2Indices(stream, which_sources=('targets',)) stream = Padding(stream) stream = FilterSources(stream, sources=("features", "targets")) return stream
def get_datastream(self, kind, indices): split = { 'trn': self.trn, 'val': self.val, 'tst': self.tst, }[kind] indices = indices if indices is not None else split.ind assert len(set(indices) - set(split.ind)) == 0, 'requested indices outside of split' ds = DataStream.default_stream( split.set, iteration_scheme=ShuffledScheme(indices, split.batch_size)) return ds
def evaluate_lenet5(train, test, valid, learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :param dataset train: Fuel dataset to use for training. :param dataset test: Fuel dataset to use for testing. :param dataset valid: Fuel dataset to use for validation. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) train_stream = DataStream.default_stream(train, iteration_scheme=SequentialScheme( train.num_examples, batch_size)) valid_stream = DataStream.default_stream(valid, iteration_scheme=SequentialScheme( train.num_examples, batch_size)) test_stream = DataStream.default_stream(test, iteration_scheme=SequentialScheme( train.num_examples, batch_size)) x = T.tensor4('x') yi = T.imatrix('y') y = yi.reshape((yi.shape[0], )) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=x, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function([x, yi], layer3.errors(y)) validate_model = theano.function([x, yi], layer3.errors(y)) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function([x, yi], cost, updates=updates) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found # a relative improvement of this much is considered significant improvement_threshold = 0.995 n_train_batches = (train.num_examples + batch_size - 1) // batch_size # go through this many minibatches before checking the network on # the validation set; in this case we check every epoch validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 iter = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 minibatch_index = 0 for minibatch in train_stream.get_epoch_iterator(): iter += 1 minibatch_index += 1 if iter % 100 == 0: print 'training @ iter = ', iter error = train_model(minibatch[0], minibatch[1]) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(vb[0], vb[1]) for vb in valid_stream.get_epoch_iterator() ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(tb[0], tb[1]) for tb in test_stream.get_epoch_iterator() ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code ran for %.2fm' % ((end_time - start_time) / 60.))
def main(save_to): batch_size = 365 feature_maps = [6, 16] mlp_hiddens = [120, 84] conv_sizes = [5, 5] pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # The above are from LeCun's paper. The blocks example had: # feature_maps = [20, 50] # mlp_hiddens = [500] # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='valid', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() logging.info("Input dim: {} {} {}".format( *convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format( i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) x = tensor.tensor4('features') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) outs = VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables) # Create an interior activation model model = Model([probs] + outs) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) algorithm = MaximumActivationSearch(outputs=outs) # Use the mnist test set, unshuffled mnist_test = MNIST(("test",), sources=['features']) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size)) extensions = [Timing(), FinishAfter(after_n_epochs=1), DataStreamMonitoring( [], mnist_test_stream, prefix="test"), Checkpoint("maxact.tar"), ProgressBar(), Printing()] main_loop = MainLoop( algorithm, mnist_test_stream, model=model, extensions=extensions) main_loop.run() examples = mnist_test.get_example_stream() example = examples.get_data(0)[0] layers = convnet.layers for output, record in algorithm.maximum_activations.items(): layer = get_brick(output) activations, indices, snapshots = ( r.get_value() if r else None for r in record[1:]) filmstrip = Filmstrip( example.shape[-2:], (indices.shape[1], indices.shape[0]), background='blue') if layer in layers: fieldmap = layerarray_fieldmap(layers[0:layers.index(layer) + 1]) for unit in range(indices.shape[1]): for index in range(100): mask = make_mask(example.shape[-2:], fieldmap, numpy.clip( snapshots[index, unit, :, :], 0, numpy.inf)) imagenum = indices[index, unit, 0] filmstrip.set_image((unit, index), examples.get_data(imagenum)[0], mask) else: for unit in range(indices.shape[1]): for index in range(100): imagenum = indices[index, unit] filmstrip.set_image((unit, index), examples.get_data(imagenum)[0]) filmstrip.save(layer.name + '_maxact.jpg')
from fuel.datasets.hdf5 import H5PYDataset train_set = H5PYDataset( './data/data.hdf5', which_sets=('train', ), subset=slice(0, 290000), # load_in_memory=True) valid_set = H5PYDataset( './data/data.hdf5', which_sets=('train', ), subset=slice(290000, 300000), # load_in_memory=True) train_stream = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, batch_size=1000)) valid_stream = DataStream.default_stream(valid_set, iteration_scheme=ShuffledScheme( valid_set.num_examples, batch_size=1000)) # compute mean target values print('Computing mean target values...') cps = [] deps = [] primes = [] hascar = [] cp_index = train_set.provides_sources.index('codepostal') prime_index = train_set.provides_sources.index('labels')
def create_main_loop(save_to, num_epochs, unit_order=None, batch_size=500, num_batches=None): image_size = (28, 28) output_size = 10 convnet = create_lenet_5() x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) case_costs = CasewiseCrossEntropy().apply(y.flatten(), probs) cost = case_costs.mean().copy(name='cost') # cost = (CategoricalCrossEntropy().apply(y.flatten(), probs) # .copy(name='cost')) error_rate = (MisclassificationRate().apply(y.flatten(), probs).copy(name='error_rate')) cg = ComputationGraph([cost, error_rate]) # Apply regularization to the cost weights = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + sum([0.0003 * (W**2).sum() for W in weights]) cost.name = 'cost_with_regularization' mnist_train = MNIST(("train", )) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size)) mnist_test = MNIST(("test", )) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Train with simple SGD algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=AdaDelta()) # Find layer outputs to probe outs = OrderedDict( reversed( list((get_brick(out).name, out) for out in VariableFilter(roles=[OUTPUT], bricks=[Convolutional, Linear])( cg.variables)))) actpic_extension = ActpicExtension(actpic_variables=outs, case_labels=y, pics=x, label_count=output_size, rectify=-1, data_stream=mnist_test_stream, after_batch=True) synpic_extension = SynpicExtension(synpic_parameters=biases, case_costs=case_costs, case_labels=y, pics=x, batch_size=batch_size, pic_size=image_size, label_count=output_size, after_batch=True) # Impose an orderint for the SaveImages extension if unit_order is not None: with open(unit_order, 'rb') as handle: histograms = pickle.load(handle) unit_order = compute_unit_order(histograms) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), actpic_extension, synpic_extension, SaveImages(picsources=[synpic_extension, actpic_extension], title="LeNet-5: batch {i}, " + "cost {cost_with_regularization:.2f}, " + "trainerr {error_rate:.3f}", data=[cost, error_rate], graph='error_rate', graph_len=500, unit_order=unit_order, after_batch=True), DataStreamMonitoring([cost, error_rate], mnist_test_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing() ] model = Model(cost) main_loop = MainLoop(algorithm, mnist_train_stream, model=model, extensions=extensions) return main_loop
data_train = H5PYDataset('/home/xuehongyang/TGIF_open_161217.hdf5', which_sets=('train', ), subset=slice(0, 230689 // bs * bs)) data_test = H5PYDataset('/home/xuehongyang/TGIF_open_161217.hdf5', which_sets=('test', ), sources=( 'question_features', 'question_features_reverse', 'mask_matrix', 'visual_features', ), subset=slice(0, 32378 // bs * bs)) data_stream_train = DataStream.default_stream(data_train, iteration_scheme=ShuffledScheme( data_train.num_examples, batch_size=bs)) data_stream_test = DataStream.default_stream(data_test, iteration_scheme=SequentialScheme( data_test.num_examples, batch_size=bs)) learning_rate = 0.0002 n_epochs = 100 algorithm = GradientDescent(cost=cost, parameters=cg.parameters, on_unused_sources='ignore', step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate),
def s(s): return Flatten( DataStream.default_stream(s, iteration_scheme=ShuffledScheme( s.num_examples, batch_size=256)))
''' return net['conv1_1'] if __name__ == '__main__': from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 train_stream = Flatten( DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % 32), batch_size=32))) shp = next(train_stream.get_epoch_iterator())[0].shape input_ = T.tensor4('inputs_var') unet = buildUnet(1, dropout=True, input_var=input_, trainable=True) output = unet.get_output_for(input_) test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0] #test_prediction_dimshuffle = test_prediction.dimshuffle((0, 2, 3, 1)) pred_fcn_fn = theano.function([input_], test_prediction) for data in train_stream.get_epoch_iterator(): data_use = (data[0].reshape(32, 1, 28, 28), ) out_put = pred_fcn_fn(data_use[0]) import ipdb
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten( DataStream.default_stream( dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples % args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams ''' x = T.matrix('x', dtype='float32') temp = T.scalar('temp', dtype='float32') f=transition_operator(tparams, model_options, x, temp) for data in train_stream.get_epoch_iterator(): print data[0] a = f([data[0], 1.0, 1]) #ipdb.set_trace() ''' x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' for param in tparams: print param print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size, 3 * 32 * 32), ) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( [data_run, temperature_forward, 1]) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. t1 = time.time() #print time.time() - t1, "time to get grads" t1 = time.time() logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) #'Norm_1': np.linalg.norm(gradient_updates_[0]), #'Norm_2': np.linalg.norm(gradient_updates_[1]), #'Norm_3': np.linalg.norm(gradient_updates_[2]), #'Norm_4': np.linalg.norm(gradient_updates_[3])}) #print time.time() - t1, "time to log" #print time.time() - t0, "total time in batch" t5 = time.time() if batch_index % 20 == 0: print batch_index, "cost", cost if batch_index % 200 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [data_use[0], temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion( [x_data, temperature_forward, 1]) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images( x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On backward step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/' + "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor if args.noise == "gaussian": x_sampled = np.random.normal( 0.5, 2.0, size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor**( args.num_steps * args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps * args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample( [x_data, temperature, 0]) print 'On step number, using temperature', i, temperature reverse_time( scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()
def main(job_id, params): config = ConfigParser.ConfigParser() config.readfp(open('./params')) max_epoch = int(config.get('hyperparams', 'max_iter', 100)) base_lr = float(config.get('hyperparams', 'base_lr', 0.01)) train_batch = int(config.get('hyperparams', 'train_batch', 256)) valid_batch = int(config.get('hyperparams', 'valid_batch', 512)) test_batch = int(config.get('hyperparams', 'valid_batch', 512)) W_sd = float(config.get('hyperparams', 'W_sd', 0.01)) W_mu = float(config.get('hyperparams', 'W_mu', 0.0)) b_sd = float(config.get('hyperparams', 'b_sd', 0.01)) b_mu = float(config.get('hyperparams', 'b_mu', 0.0)) hidden_units = int(config.get('hyperparams', 'hidden_units', 32)) input_dropout_ratio = float( config.get('hyperparams', 'input_dropout_ratio', 0.2)) dropout_ratio = float(config.get('hyperparams', 'dropout_ratio', 0.2)) weight_decay = float(config.get('hyperparams', 'weight_decay', 0.001)) max_norm = float(config.get('hyperparams', 'max_norm', 100.0)) solver = config.get('hyperparams', 'solver_type', 'rmsprop') data_file = config.get('hyperparams', 'data_file') side = config.get('hyperparams', 'side', 'b') # Spearmint optimization parameters: if params: base_lr = float(params['base_lr'][0]) dropout_ratio = float(params['dropout_ratio'][0]) hidden_units = params['hidden_units'][0] weight_decay = params['weight_decay'][0] if 'adagrad' in solver: solver_type = CompositeRule([ AdaGrad(learning_rate=base_lr), VariableClipping(threshold=max_norm) ]) else: solver_type = CompositeRule([ RMSProp(learning_rate=base_lr), VariableClipping(threshold=max_norm) ]) input_dim = {'l': 11427, 'r': 10519, 'b': 10519 + 11427} data_file = config.get('hyperparams', 'data_file') if 'b' in side: train = H5PYDataset(data_file, which_set='train') valid = H5PYDataset(data_file, which_set='valid') test = H5PYDataset(data_file, which_set='test') x_l = tensor.matrix('l_features') x_r = tensor.matrix('r_features') x = tensor.concatenate([x_l, x_r], axis=1) else: train = H5PYDataset(data_file, which_set='train', sources=['{}_features'.format(side), 'targets']) valid = H5PYDataset(data_file, which_set='valid', sources=['{}_features'.format(side), 'targets']) test = H5PYDataset(data_file, which_set='test', sources=['{}_features'.format(side), 'targets']) x = tensor.matrix('{}_features'.format(side)) y = tensor.lmatrix('targets') # Define a feed-forward net with an input, two hidden layers, and a softmax output: model = MLP(activations=[ Rectifier(name='h1'), Rectifier(name='h2'), Softmax(name='output'), ], dims=[input_dim[side], hidden_units, hidden_units, 2], weights_init=IsotropicGaussian(std=W_sd, mean=W_mu), biases_init=IsotropicGaussian(b_sd, b_mu)) # Don't forget to initialize params: model.initialize() # y_hat is the output of the neural net with x as its inputs y_hat = model.apply(x) # Define a cost function to optimize, and a classification error rate. # Also apply the outputs from the net and corresponding targets: cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat) error = MisclassificationRate().apply(y.flatten(), y_hat) error.name = 'error' # This is the model: before applying dropout model = Model(cost) # Need to define the computation graph for the cost func: cost_graph = ComputationGraph([cost]) # This returns a list of weight vectors for each layer W = VariableFilter(roles=[WEIGHT])(cost_graph.variables) # Add some regularization to this model: cost += weight_decay * l2_norm(W) cost.name = 'entropy' # computational graph with l2 reg cost_graph = ComputationGraph([cost]) # Apply dropout to inputs: inputs = VariableFilter([INPUT])(cost_graph.variables) dropout_inputs = [ input for input in inputs if input.name.startswith('linear_') ] dropout_graph = apply_dropout(cost_graph, [dropout_inputs[0]], input_dropout_ratio) dropout_graph = apply_dropout(dropout_graph, dropout_inputs[1:], dropout_ratio) dropout_cost = dropout_graph.outputs[0] dropout_cost.name = 'dropout_entropy' # Learning Algorithm (notice: we use the dropout cost for learning): algo = GradientDescent(step_rule=solver_type, params=dropout_graph.parameters, cost=dropout_cost) # algo.step_rule.learning_rate.name = 'learning_rate' # Data stream used for training model: training_stream = Flatten( DataStream.default_stream(dataset=train, iteration_scheme=ShuffledScheme( train.num_examples, batch_size=train_batch))) training_monitor = TrainingDataMonitoring([ dropout_cost, aggregation.mean(error), aggregation.mean(algo.total_gradient_norm) ], after_batch=True) # Use the 'valid' set for validation during training: validation_stream = Flatten( DataStream.default_stream(dataset=valid, iteration_scheme=ShuffledScheme( valid.num_examples, batch_size=valid_batch))) validation_monitor = DataStreamMonitoring(variables=[cost, error], data_stream=validation_stream, prefix='validation', after_epoch=True) test_stream = Flatten( DataStream.default_stream( dataset=test, iteration_scheme=ShuffledScheme(test.num_examples, batch_size=test_batch))) test_monitor = DataStreamMonitoring(variables=[error], data_stream=test_stream, prefix='test', after_training=True) plotting = Plot('AdniNet_{}'.format(side), channels=[ ['dropout_entropy', 'validation_entropy'], ['error', 'validation_error'], ], after_batch=False) # Checkpoint class used to save model and log: stamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H:%M') checkpoint = Checkpoint('./models/{}net/{}'.format(side, stamp), save_separately=['model', 'log'], every_n_epochs=1) # Home-brewed class for early stopping when we detect we have started to overfit early_stopper = FinishIfOverfitting(error_name='error', validation_name='validation_error', threshold=0.1, epochs=5, burn_in=100) # The main loop will train the network and output reports, etc main_loop = MainLoop(data_stream=training_stream, model=model, algorithm=algo, extensions=[ validation_monitor, training_monitor, plotting, FinishAfter(after_n_epochs=max_epoch), early_stopper, Printing(), ProgressBar(), checkpoint, test_monitor, ]) main_loop.run() ve = float(main_loop.log.last_epoch_row['validation_error']) te = float(main_loop.log.last_epoch_row['error']) spearmint_loss = ve + abs(te - ve) print 'Spearmint Loss: {}'.format(spearmint_loss) return spearmint_loss
def run(epochs=1, corpus="data/", HIDDEN_DIMS=100, path="./"): brown = BrownDataset(corpus) INPUT_DIMS = brown.get_vocabulary_size() OUTPUT_DIMS = brown.get_vocabulary_size() # These are theano variables x = tensor.lmatrix('context') y = tensor.ivector('output') # Construct the graph input_to_hidden = LookupTable(name='input_to_hidden', length=INPUT_DIMS, dim=HIDDEN_DIMS) # Compute the weight matrix for every word in the context and then compute # the average. h = tensor.mean(input_to_hidden.apply(x), axis=1) hidden_to_output = Linear(name='hidden_to_output', input_dim=HIDDEN_DIMS, output_dim=OUTPUT_DIMS) y_hat = Softmax().apply(hidden_to_output.apply(h)) # And initialize with random varibales and set the bias vector to 0 weights = IsotropicGaussian(0.01) input_to_hidden.weights_init = hidden_to_output.weights_init = weights input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0) input_to_hidden.initialize() hidden_to_output.initialize() # And now the cost function cost = CategoricalCrossEntropy().apply(y, y_hat) cg = ComputationGraph(cost) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + 0.01 * (W1 ** 2).sum() + 0.01 * (W2 ** 2).sum() cost.name = 'cost_with_regularization' mini_batch = SequentialScheme(brown.num_instances(), 512) data_stream = DataStream.default_stream(brown, iteration_scheme=mini_batch) # Now we tie up lose ends and construct the algorithm for the training # and define what happens in the main loop. algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ ProgressBar(), FinishAfter(after_n_epochs=epochs), Printing(), # TrainingDataMonitoring(variables=[cost]), SaveWeights(layers=[input_to_hidden, hidden_to_output], prefixes=['%sfirst' % path, '%ssecond' % path]), # Plot( # 'Word Embeddings', # channels=[ # [ # 'cost_with_regularization' # ] # ]) ] logger.info("Starting main loop...") main = MainLoop(data_stream=data_stream, algorithm=algorithm, extensions=extensions) main.run() pickle.dump(cg, open('%scg.pickle' % path, 'wb'))
def _segment_axis(data): x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]]) return (x, ) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz') data_stats = numpy.load(data_dir) data_mean = data_stats['data_mean'] data_std = data_stats['data_std'] dataset = Blizzard(which_sets=('train', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64 * 5 dataset = Blizzard(which_sets=('valid', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_valid_examples, 10 * batch_size))
def run(): name = 'colored-mnist' epochs = 200 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 150 data_train = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='train', num=50000, bs=bs) data_valid = CaptionedMNIST(banned=[np.random.randint(0,10) for i in xrange(12)], dataset='valid', num=10000, bs=bs) train_stream = DataStream.default_stream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs)) valid_stream = DataStream.default_stream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, bs)) img_height, img_width = (60,60) x = T.matrix('features') #x.tag.test_value = np.random.rand(bs, 60*60).astype('float32') y = T.lmatrix('captions') #y.tag.test_value = np.random.rand(bs, 12).astype(int) mask = T.lmatrix('mask') #mask.tag.test_value = np.ones((bs,12)).astype(int) K = 29 lang_N = 14 N = 32 read_size = 8 write_size = 8 m = 256 gen_dim = 300 infer_dim = 300 z_dim = 150 l = 512 model = ImageModel(bs, K, lang_N, N, read_size, write_size, m, gen_dim, infer_dim, z_dim, l, image_size=60*60, cinit=-10, channels=3) model._inputs = [x,y,mask] kl, log_recons, log_likelihood, c = model.train(x,y,mask) kl.name = 'kl' log_recons.name = 'log_recons' log_likelihood.name = 'log_likelihood' c.name = 'c' model._outputs = [kl, log_recons, log_likelihood, c] params = model.params from solvers.RMSProp import RMSProp as solver lr = theano.shared(np.asarray(0.001).astype(theano.config.floatX)) updates = solver(log_likelihood, params, lr=lr)#0.001)#, clipnorm=10.0) model._updates = updates logger.info('Compiling sample function') model.build_sample_function(y, mask) logger.info('Compiled sample function') # ============= TRAIN ========= plots = [['train_kl','valid_kl'], ['train_log_recons','valid_log_recons'], ['train_log_likelihood','valid_log_likelihood']] main_loop = MainLoop(model, train_stream, [FinishAfter(epochs), Track(variables=['kl','log_recons','log_likelihood'], prefix='train'), #TrackBest(variables=['kl'], prefix='train'), DataStreamTrack(valid_stream, ['kl','log_recons','log_likelihood'], prefix='valid'), SampleSentences(subdir, bs, 60, 60), DropLearningRate(lr, 110, 0.00001), Plot(name, plots, 'http://nameless-wave-6526.herokuapp.com/'), SaveModel(subdir, name+'.model'), TimeProfile(), Printing()]) main_loop.run()
from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from functions.custom_transformers import RandomDownscale, RandomFixedSizeCrop, RandomRotate, Normalize, Cast import math train_set = H5PYDataset('../data/data_1.hdf5', which_sets=('train', ), subset=slice(0, 20000), load_in_memory=True) index_images = 0 index_labels = 1 stream = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, 125)) #downscaled_stream = RandomDownscale(stream, 140) stream = RandomRotate(stream, 20) #cropped_stream = RandomFixedSizeCrop(rotated_stream, (130,130)) stream = Normalize(stream) stream = Cast(stream, 'floatX') start_server(stream, hwm=10)
def run(): configs = [0] for config in configs: bs = 48 feature_dim = 4000 from uniform_dataset import UniformDataset data_test = UniformDataset(bs=bs, filename='/ssd2/hmdb/hmdb-tdd-1.hdf5', which_sets=['test'], sources=['features', 'time_mask', 'labels']) test_stream = DataStream.default_stream( data_test, iteration_scheme=SequentialScheme(data_test.num_examples, bs)) x = T.tensor3('features') time_mask = T.wmatrix('time_mask') y = T.imatrix('labels') classes = eval(sys.argv[1]) outputs = [] for clas in classes: print 'Loading', clas model = cPickle.load(open('models/learned_' + str(clas), 'rb')) prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y) prob.name = 'prob_' + str(clas) outputs += [prob] # prob is Nx1 # outputs is 51xNx1 # stack and take max along 51-class index outputs = T.stacklists(outputs) preds = T.argmax(outputs, axis=0) # predicted class is now outputs # which is shape Nx1, reshape to vector of N preds = preds.reshape((preds.shape[0], 1)) num_err = T.neq(preds, y).sum() acc = 1 - (num_err / y.shape[0]) test_func = theano.function([x, time_mask, y], outputs, on_unused_input='warn') data = test_stream.get_epoch_iterator(as_dict=True) total_acc = 0 num = 0 res = None labs = None for batch in data: o = test_func(batch['features'], batch['time_mask'], batch['labels']) if res is None: res = o labs = batch['labels'] else: # append on axis 1, to get 51xDs_size res = np.append(res, o, axis=1) labs = np.append(labs, batch['labels'], axis=0) continue total_acc += acc num += 1 print acc np.save('results' + sys.argv[2], res) np.save('labs' + sys.argv[2], labs)
def main(save_to, num_epochs, feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=500): if feature_maps is None: feature_maps = [20, 50] if mlp_hiddens is None: mlp_hiddens = [500] if conv_sizes is None: conv_sizes = [5, 5] if pool_sizes is None: pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='full', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() logging.info( "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): logging.info("Layer {} dim: {} {} {}".format(i, *layer.get_dim('output'))) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cost = named_copy(CategoricalCrossEntropy().apply(y.flatten(), probs), 'cost') error_rate = named_copy(MisclassificationRate().apply(y.flatten(), probs), 'error_rate') cg = ComputationGraph([cost, error_rate]) mnist_train = MNIST(("train", )) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size)) mnist_test = MNIST(("test", )) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)) # Train with simple SGD algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], mnist_test_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing() ] model = Model(cost) main_loop = MainLoop(algorithm, mnist_train_stream, model=model, extensions=extensions) main_loop.run()
cg = ComputationGraph(cost) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + 0.001 * abs(W1).sum() + 0.001 * abs(W2).sum() cost.name = 'cost' error_rate = MisclassificationRate().apply(y.argmax(axis=1), y_hat) error_rate.name = 'error_rate' algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) train_set = H5PYDataset('mushrooms.hdf5', which_sets=('train', )) train_stream = DataStream.default_stream(train_set, iteration_scheme=SequentialScheme( train_set.num_examples, batch_size=128)) test_set = H5PYDataset('mushrooms.hdf5', which_sets=('test', )) test_stream = DataStream.default_stream(test_set, iteration_scheme=SequentialScheme( test_set.num_examples, batch_size=128)) main = MainLoop(model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=10), Printing(), TrainingDataMonitoring([cost, error_rate],
def __init__(self, save_to): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers mnist_test = MNIST(("test", ), sources=['features', 'targets']) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) sensitive_unit_count = (SensitiveUnitCount().apply( y.flatten(), probs, biases).copy(name='sensitive_unit_count')) sensitive_unit_count.tag.aggregation_scheme = ( Concatenate(sensitive_unit_count)) active_unit_count = (ActiveUnitCount().apply(outs).copy( name='active_unit_count')) active_unit_count.tag.aggregation_scheme = ( Concatenate(active_unit_count)) ignored_unit_count = (IgnoredUnitCount().apply( y.flatten(), probs, biases, outs).copy(name='ignored_unit_count')) ignored_unit_count.tag.aggregation_scheme = ( Concatenate(ignored_unit_count)) model = Model([ error_rate, sensitive_unit_count, active_unit_count, ignored_unit_count ]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test = MNIST(("test", )) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([ error_rate, sensitive_unit_count, active_unit_count, ignored_unit_count ]) results = evaluator.evaluate(mnist_test_stream) def save_ranked_image(scores, filename): sorted_instances = scores.argsort() filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=(100, 100)) for i, index in enumerate(sorted_instances): filmstrip.set_image((i // 100, i % 100), mnist_test.get_data(request=index)[0]) filmstrip.save(filename) save_ranked_image(results['sensitive_unit_count'], 'sensitive.jpg') save_ranked_image(results['active_unit_count'], 'active.jpg') save_ranked_image(results['ignored_unit_count'], 'ignored.jpg')
def train(): if os.path.isfile('trainingdata.tar'): with open('trainingdata.tar', 'rb') as f: main = load(f) else: hidden_size = 512 filename = 'warpeace.hdf5' encoder = HDF5CharEncoder('warpeace_input.txt', 1000) encoder.write(filename) alphabet_len = encoder.length x = theano.tensor.lmatrix('x') readout = Readout( readout_dim=alphabet_len, feedback_brick=LookupFeedback(alphabet_len, hidden_size, name='feedback'), source_names=['states'], emitter=RandomSoftmaxEmitter(), name='readout' ) transition = GatedRecurrent( activation=Tanh(), dim=hidden_size) transition.weights_init = IsotropicGaussian(0.01) gen = SequenceGenerator(readout=readout, transition=transition, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name='sequencegenerator') gen.push_initialization_config() gen.initialize() cost = gen.cost(outputs=x) cost.name = 'cost' cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(0.5)) train_set = encoder.get_dataset() train_stream = DataStream.default_stream( train_set, iteration_scheme=SequentialScheme( train_set.num_examples, batch_size=128)) main = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(), Printing(), Checkpoint('trainingdata.tar', every_n_epochs=10), ShowOutput(every_n_epochs=10) ]) main.run()
parameters=cg.parameters, step_rule=CompositeRule(step_rules), on_unused_sources='ignore') from blocks.extensions import Timing, FinishAfter, Printing, ProgressBar from blocks.extensions.monitoring import TrainingDataMonitoring from fuel.streams import DataStream from fuel.schemes import SequentialScheme from blocks.main_loop import MainLoop from blocks.extensions.saveload import Checkpoint from blocks.model import Model main_loop = MainLoop(algorithm=algorithm, data_stream=DataStream.default_stream( dataset=train_dataset, iteration_scheme=SequentialScheme( train_dataset.num_examples, batch_size=100)), model=Model(y_est), extensions=[ Timing(), FinishAfter(after_n_epochs=200), TrainingDataMonitoring(variables=[cost], prefix="train", after_epoch=True), Printing(), ProgressBar(), Checkpoint(path="./checkpoint.zip") ]) main_loop.run()
valid_set = H5PYDataset( './data_kaggle/kaggle_heart.hdf5', which_sets=('train',), #subset=slice(451, 494), subset=slice(451, 491), load_in_memory=True ) index_cases = 0 index_position = 1 index_mult = 2 index_sax = 3 index_images = 4 index_targets = 5 stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme(valid_set.num_examples, 10) ) #downscaled_stream = RandomDownscale(stream, 70) masked_stream = ApplyMask(stream) order_stream = OrderFeatures(masked_stream) cropped_stream = RandomFixedSizeCrop(order_stream, (64,64)) float_stream = Normalize(cropped_stream) padded_stream = ZeroPadding(float_stream) casted_stream = Cast(padded_stream, 'floatX') start_server(casted_stream, port=5558, hwm=10)
def main(dataset_path, use_c, log_min, log_max, num_steps): train_set = H5PYDataset( dataset_path, which_sets=('train',), sources=('features', 'targets'), subset=slice(0, 63257), load_in_memory=True) train_stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledExampleScheme(train_set.num_examples)) def get_class_balanced_batch(iterator): train_features = [[] for _ in range(10)] train_targets = [[] for _ in range(10)] batch_size = 0 while batch_size < 1000: f, t = next(iterator) t = t[0] if len(train_features[t]) < 100: train_features[t].append(f) train_targets[t].append(t) batch_size += 1 train_features = numpy.vstack(sum(train_features, [])) train_targets = numpy.vstack(sum(train_targets, [])) return train_features, train_targets train_features, train_targets = get_class_balanced_batch( train_stream.get_epoch_iterator()) valid_set = H5PYDataset( dataset_path, which_sets=('train',), sources=('features', 'targets'), subset=slice(63257, 73257), load_in_memory=True) valid_features, valid_targets = valid_set.data_sources test_set = H5PYDataset( dataset_path, which_sets=('test',), sources=('features', 'targets'), load_in_memory=True) test_features, test_targets = test_set.data_sources if use_c is None: best_error_rate = 1.0 best_C = None for log_C in numpy.linspace(log_min, log_max, num_steps): C = numpy.exp(log_C) svm = LinearSVC(C=C) svm.fit(train_features, train_targets.ravel()) error_rate = 1 - numpy.mean( [svm.score(valid_features[1000 * i: 1000 * (i + 1)], valid_targets[1000 * i: 1000 * (i + 1)].ravel()) for i in range(10)]) if error_rate < best_error_rate: best_error_rate = error_rate best_C = C print('C = {}, validation error rate = {} '.format(C, error_rate) + '(best is {}, {})'.format(best_C, best_error_rate)) else: best_C = use_c error_rates = [] for _ in range(10): train_features, train_targets = get_class_balanced_batch( train_stream.get_epoch_iterator()) svm = LinearSVC(C=best_C) svm.fit(train_features, train_targets.ravel()) error_rates.append(1 - numpy.mean( [svm.score(valid_features[1000 * i: 1000 * (i + 1)], valid_targets[1000 * i: 1000 * (i + 1)].ravel()) for i in range(10)])) print('Validation error rate = {} +- {} '.format(numpy.mean(error_rates), numpy.std(error_rates))) error_rates = [] for _ in range(100): train_features, train_targets = get_class_balanced_batch( train_stream.get_epoch_iterator()) svm = LinearSVC(C=best_C) svm.fit(train_features, train_targets.ravel()) s = 1000 * numpy.sum( [svm.score(test_features[1000 * i: 1000 * (i + 1)], test_targets[1000 * i: 1000 * (i + 1)].ravel()) for i in range(26)]) s += 32 * svm.score(test_features[-32:], test_targets[-32:].ravel()) s = s / 26032.0 error_rates.append(1 - s) print('Test error rate = {} +- {} '.format(numpy.mean(error_rates), numpy.std(error_rates)))
from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from config import basepath, minibatch_size from transformers.custom_transformers import Standardize submit_set = H5PYDataset( basepath + 'data.hdf5', which_sets=('submit', ), #subset=slice(0,50), sources=['features', 'image_name'], load_in_memory=False) stream = DataStream.default_stream(submit_set, iteration_scheme=SequentialScheme( submit_set.num_examples, minibatch_size)) print('I provide sources ', submit_set.sources) print('Number of examples', submit_set.num_examples) standardized_stream = Standardize(stream, 255) start_server(standardized_stream)
def __init__(self, save_to): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers logging.info("Input dim: {} {} {}".format( *convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format( i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) mnist_test = MNIST(("test",), sources=['features', 'targets']) basis = create_fair_basis(mnist_test, 10, 10) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outs = OrderedDict((full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])( cg.variables)) # Normalize input and apply the convnet error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) confusion = (ConfusionMatrix().apply(y.flatten(), probs) .copy(name='confusion')) confusion.tag.aggregation_scheme = Sum(confusion) confusion_image = (ConfusionImage().apply(y.flatten(), probs, x) .copy(name='confusion_image')) confusion_image.tag.aggregation_scheme = Sum(confusion_image) model = Model( [error_rate, confusion, confusion_image] + list(outs.values())) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test = MNIST(("test",)) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size)) self.model = model self.mnist_test_stream = mnist_test_stream self.evaluator = DatasetEvaluator( [error_rate, confusion, confusion_image]) self.base_results = self.evaluator.evaluate(mnist_test_stream) # TODO: allow target layer to be parameterized self.target_layer = '/lenet/mlp/linear_0' self.next_layer_param = '/lenet/mlp/linear_1.W' self.base_sample = extract_sample( outs[self.target_layer], mnist_test_stream) self.base_param_value = ( model.get_parameter_dict()[ self.next_layer_param].get_value().copy())
dropout_inputs = [input for input in inputs if input.name.startswith('linear_')] dropout_graph = apply_dropout(cost_graph, dropout_inputs, dropout_ratio) dropout_cost = dropout_graph.outputs[0] dropout_cost.name = 'dropout_entropy' # Learning Algorithm: algo = GradientDescent( step_rule=solver_type, params=dropout_graph.parameters, cost=dropout_cost) # Data stream used for training model: training_stream = Flatten( DataStream.default_stream( dataset=train, iteration_scheme=ShuffledScheme( train.num_examples, batch_size=train_batch))) training_monitor = TrainingDataMonitoring([cost], after_batch=True) # Use the 'valid' set for validation during training: validation_stream = Flatten( DataStream.default_stream( dataset=valid, iteration_scheme=ShuffledScheme( valid.num_examples, batch_size=valid_batch))) validation_monitor = DataStreamMonitoring( variables=[cost],
def main(save_to, num_epochs, weight_decay=0.0001, noise_pressure=0, subset=None, num_batches=None, batch_size=None, histogram=None, resume=False): output_size = 10 prior_noise_level = -10 noise_step_rule = Scale(1e-6) noise_rate = theano.shared(numpy.asarray(1e-5, dtype=theano.config.floatX)) convnet = create_res_net(out_noise=True, tied_noise=True, tied_sigma=True, noise_rate=noise_rate, prior_noise_level=prior_noise_level) x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet test_probs = convnet.apply(x) test_cost = (CategoricalCrossEntropy().apply(y.flatten(), test_probs) .copy(name='cost')) test_error_rate = (MisclassificationRate().apply(y.flatten(), test_probs) .copy(name='error_rate')) test_confusion = (ConfusionMatrix().apply(y.flatten(), test_probs) .copy(name='confusion')) test_confusion.tag.aggregation_scheme = Sum(test_confusion) test_cg = ComputationGraph([test_cost, test_error_rate]) # Apply dropout to all layer outputs except final softmax # dropout_vars = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables) # drop_cg = apply_dropout(test_cg, dropout_vars, 0.5) # Apply 0.2 dropout to the pre-averaging layer # dropout_vars_2 = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_8_apply_output$")(test_cg.variables) # train_cg = apply_dropout(test_cg, dropout_vars_2, 0.2) # Apply 0.2 dropout to the input, as in the paper # train_cg = apply_dropout(test_cg, [x], 0.2) # train_cg = drop_cg # train_cg = apply_batch_normalization(test_cg) # train_cost, train_error_rate, train_components = train_cg.outputs with batch_normalization(convnet): with training_noise(convnet): train_probs = convnet.apply(x) train_cost = (CategoricalCrossEntropy().apply(y.flatten(), train_probs) .copy(name='cost')) train_components = (ComponentwiseCrossEntropy().apply(y.flatten(), train_probs).copy(name='components')) train_error_rate = (MisclassificationRate().apply(y.flatten(), train_probs).copy(name='error_rate')) train_cg = ComputationGraph([train_cost, train_error_rate, train_components]) population_updates = get_batch_normalization_updates(train_cg) bn_alpha = 0.9 extra_updates = [(p, p * bn_alpha + m * (1 - bn_alpha)) for p, m in population_updates] # for annealing nit_penalty = theano.shared(numpy.asarray(noise_pressure, dtype=theano.config.floatX)) nit_penalty.name = 'nit_penalty' # Compute noise rates for training graph train_logsigma = VariableFilter(roles=[LOG_SIGMA])(train_cg.variables) train_mean_log_sigma = tensor.concatenate([n.flatten() for n in train_logsigma]).mean() train_mean_log_sigma.name = 'mean_log_sigma' train_nits = VariableFilter(roles=[NITS])(train_cg.auxiliary_variables) train_nit_rate = tensor.concatenate([n.flatten() for n in train_nits]).mean() train_nit_rate.name = 'nit_rate' train_nit_regularization = nit_penalty * train_nit_rate train_nit_regularization.name = 'nit_regularization' # Apply regularization to the cost trainable_parameters = VariableFilter(roles=[WEIGHT, BIAS])( train_cg.parameters) mask_parameters = [p for p in trainable_parameters if get_brick(p).name == 'mask'] noise_parameters = VariableFilter(roles=[NOISE])(train_cg.parameters) biases = VariableFilter(roles=[BIAS])(train_cg.parameters) weights = VariableFilter(roles=[WEIGHT])(train_cg.variables) nonmask_weights = [p for p in weights if get_brick(p).name != 'mask'] l2_norm = sum([(W ** 2).sum() for W in nonmask_weights]) l2_norm.name = 'l2_norm' l2_regularization = weight_decay * l2_norm l2_regularization.name = 'l2_regularization' # testversion test_cost = test_cost + l2_regularization test_cost.name = 'cost_with_regularization' # Training version of cost train_cost_without_regularization = train_cost train_cost_without_regularization.name = 'cost_without_regularization' train_cost = train_cost + l2_regularization + train_nit_regularization train_cost.name = 'cost_with_regularization' cifar10_train = CIFAR10(("train",)) cifar10_train_stream = RandomPadCropFlip( NormalizeBatchLevels(DataStream.default_stream( cifar10_train, iteration_scheme=ShuffledScheme( cifar10_train.num_examples, batch_size)), which_sources=('features',)), (32, 32), pad=4, which_sources=('features',)) test_batch_size = 128 cifar10_test = CIFAR10(("test",)) cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_test, iteration_scheme=ShuffledScheme( cifar10_test.num_examples, test_batch_size)), which_sources=('features',)) momentum = Momentum(0.01, 0.9) # Create a step rule that doubles the learning rate of biases, like Caffe. # scale_bias = Restrict(Scale(2), biases) # step_rule = CompositeRule([scale_bias, momentum]) # Create a step rule that reduces the learning rate of noise scale_mask = Restrict(noise_step_rule, mask_parameters) step_rule = CompositeRule([scale_mask, momentum]) # from theano.compile.nanguardmode import NanGuardMode # Train with simple SGD algorithm = GradientDescent( cost=train_cost, parameters=trainable_parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) #, # theano_func_kwargs={ # 'mode': NanGuardMode( # nan_is_error=True, inf_is_error=True, big_is_error=True)}) exp_name = save_to.replace('.%d', '') # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), EpochSchedule(momentum.learning_rate, [ (0, 0.01), # Warm up with 0.01 learning rate (50, 0.1), # Then go back to 0.1 (100, 0.01), (150, 0.001) # (83, 0.01), # Follow the schedule in the paper # (125, 0.001) ]), EpochSchedule(noise_step_rule.learning_rate, [ (0, 1e-2), (2, 1e-1), (4, 1) # (0, 1e-6), # (2, 1e-5), # (4, 1e-4) ]), EpochSchedule(noise_rate, [ (0, 1e-2), (2, 1e-1), (4, 1) # (0, 1e-6), # (2, 1e-5), # (4, 1e-4), # (6, 3e-4), # (8, 1e-3), # Causes nit rate to jump # (10, 3e-3), # (12, 1e-2), # (15, 3e-2), # (19, 1e-1), # (24, 3e-1), # (30, 1) ]), NoiseExtension( noise_parameters=noise_parameters), NoisyDataStreamMonitoring( [test_cost, test_error_rate, test_confusion], cifar10_test_stream, noise_parameters=noise_parameters, prefix="test"), TrainingDataMonitoring( [train_cost, train_error_rate, train_nit_rate, train_cost_without_regularization, l2_regularization, train_nit_regularization, momentum.learning_rate, train_mean_log_sigma, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=17), # after_epoch=True), Plot('Training performance for ' + exp_name, channels=[ ['train_cost_with_regularization', 'train_cost_without_regularization', 'train_nit_regularization', 'train_l2_regularization'], ['train_error_rate'], ['train_total_gradient_norm'], ['train_mean_log_sigma'], ], every_n_batches=17), Plot('Test performance for ' + exp_name, channels=[[ 'train_error_rate', 'test_error_rate', ]], after_epoch=True), EpochCheckpoint(save_to, use_cpickle=True, after_epoch=True), ProgressBar(), Printing()] if histogram: attribution = AttributionExtension( components=train_components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(exp_name, True, True)) model = Model(train_cost) main_loop = MainLoop( algorithm, cifar10_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)
else: from model import build_model host_plot = 'http://hades.calculquebec.ca:5042' slice_train = slice(0, n_ex) slice_test = slice(45000, 50000 - 8) slice_valid = slice(40000, 45000 - 8) ## Load cifar10 stream batch_size = 32 num_train_example = slice_train.stop - slice_train.start num_valid_example = slice_valid.stop - slice_valid.start num_test_example = slice_test.stop - slice_test.start train_dataset = CIFAR10(('train', ), subset=slice_train) train_stream = DataStream.default_stream(train_dataset, iteration_scheme=SequentialScheme( train_dataset.num_examples, batch_size)) train_stream = OneHotEncode10(train_stream, which_sources=('targets', )) train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', )) train_stream = MinimumImageDimensions(train_stream, (224, 224), which_sources=('features', )) train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', )) train_stream = Cast(train_stream, 'floatX', which_sources=('features', )) valid_dataset = CIFAR10(('train', ), subset=slice_valid) valid_stream = DataStream.default_stream(valid_dataset, iteration_scheme=SequentialScheme( valid_dataset.num_examples, batch_size)) valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', )) valid_stream = MinimumImageDimensions(valid_stream, (224, 224),
from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == 'IMAGENET': from imagenet_data import IMAGENET spatial_width = 128 dataset_train = IMAGENET(['train'], width=spatial_width) dataset_test = IMAGENET(['test'], width=spatial_width) n_colors = 3 else: raise ValueError("Unknown dataset %s."%args.dataset) train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=args.batch_size))) test_stream = Flatten(DataStream.default_stream(dataset_test, iteration_scheme=ShuffledScheme( examples=dataset_test.num_examples, batch_size=args.batch_size)) ) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) # scale is applied before shift
def train(args, model_args): model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features', )) dataset_test = MNIST(['test'], sources=('features', )) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features', )) dataset_test = CIFAR10(['test'], sources=('features', )) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width = 32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width = 64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets=['train'], which_format="64", sources=('features', ), load_in_memory=False) dataset_test = CelebA(which_sets=['test'], which_format="64", sources=('features', ), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme=tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme=ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=20000, classes=1, cycles=1., noise=0.01, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) elif args.dataset == 'Circle': print 'loading Circle' train_set = Circle(num_examples=20000, classes=1, cycles=1., noise=0.0, sources=('features', )) dataset_train = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, args.batch_size)) iter_per_epoch = train_set.num_examples else: raise ValueError("Unknown dataset %s." % args.dataset) model_options = locals().copy() train_stream = dataset_train shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2)) shft = -np.mean(Xbatch * scl) # scale is applied before shift #train_stream = ScaleAndShift(train_stream, scl, shft) #test_stream = ScaleAndShift(test_stream, scl, shft) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature = build_model(tparams, model_options) inps = [x, start_temperature] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature) #print 'Building f_cost...', #f_cost = theano.function(inps, cost) #print 'Done' print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 0 print 'Number of steps....', args.num_steps print 'Done' count_sample = 1 batch_index = 0 for eidx in xrange(max_epochs): if eidx % 20 == 0: params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(eidx) + '.npz') if eidx == 30: ipdb.set_trace() n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): batch_index += 1 n_samples += len(data[0]) uidx += 1 if data[0] is None: print 'No data ' uidx -= 1 continue data_run = data[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): meta_cost.append(f_grad_shared(data_run, temperature_forward)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion( data_run, temperature_forward) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. logger.log({ 'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost }) empty = [] spiral_x = [empty for i in range(args.num_steps)] spiral_corrupted = [] spiral_sampled = [] grad_forward = [] grad_back = [] x_data_time = [] x_tilt_time = [] if batch_index % 8 == 0: count_sample += 1 temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) temperature_forward = args.temperature for num_step in range(args.num_steps): if num_step == 0: x_data_time.append(data[0]) plot_images( data[0], model_dir + '/' + 'orig_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) x_data, mu_data, _, _ = forward_diffusion( data[0], temperature_forward) plot_images( x_data, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(num_step)) x_data_time.append(x_data) temp_grad = np.concatenate( (x_data_time[-2], x_data_time[-1]), axis=1) grad_forward.append(temp_grad) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) spiral_corrupted.append(x_data) mu_data = np.asarray(mu_data).astype( 'float32').reshape(args.batch_size, INPUT_SIZE) mu_data = mu_data.reshape(args.batch_size, 2) else: x_data_time.append(x_data) x_data, mu_data, _, _ = forward_diffusion( x_data, temperature_forward) plot_images( x_data, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(num_step)) x_data = np.asarray(x_data).astype('float32').reshape( args.batch_size, INPUT_SIZE) spiral_corrupted.append(x_data) mu_data = np.asarray(mu_data).astype( 'float32').reshape(args.batch_size, INPUT_SIZE) mu_data = mu_data.reshape(args.batch_size, 2) x_data_time.append(x_data) temp_grad = np.concatenate( (x_data_time[-2], x_data_time[-1]), axis=1) grad_forward.append(temp_grad) temperature_forward = temperature_forward * args.temperature_factor mean_sampled = x_data.mean() var_sampled = x_data.var() x_temp2 = data[0].reshape(args.batch_size, 2) plot_2D( spiral_corrupted, args.num_steps, model_dir + '/' + 'corrupted_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) plot_2D( x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' + str(count_sample) + '_batch_index_' + str(batch_index)) plot_grad( grad_forward, model_dir + '/' + 'grad_forward_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) for i in range(args.num_steps + args.extra_steps): x_tilt_time.append(x_data) x_data, sampled_mean = f_sample(x_data, temperature) plot_images( x_data, model_dir + '/' + 'sampled_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index) + '_time_step_' + str(i)) x_tilt_time.append(x_data) temp_grad = np.concatenate( (x_tilt_time[-2], x_tilt_time[-1]), axis=1) grad_back.append(temp_grad) ###print 'Recons, On step number, using temperature', i, temperature x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor plot_grad( grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) plot_2D( x_tilt_time, args.num_steps, model_dir + '/' + 'sampled_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) s = np.random.normal(mean_sampled, var_sampled, [args.batch_size, 2]) x_sampled = s temperature = args.temperature * (args.temperature_factor **(args.num_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps + args.extra_steps): x_data, sampled_mean = f_sample(x_data, temperature) spiral_sampled.append(x_data) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor plot_2D( spiral_sampled, args.num_steps, model_dir + '/' + 'inference_' + 'epoch_' + str(count_sample) + '_batch_' + str(batch_index)) ipdb.set_trace()
logger = logging.Logger(__name__) FORMAT = '[%(asctime)s] %(name)s %(message)s' DATEFMT = "%M:%D:%S" logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.DEBUG) inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.) } batch_size = 100 data_train = MNIST(which_sets=['train'], sources=['features']) train_stream = Flatten( DataStream.default_stream(data_train, iteration_scheme=SequentialScheme( data_train.num_examples, batch_size))) features_size = 28 * 28 * 1 inputs = T.matrix('features') test_data = { inputs: 255 * np.random.normal(size=(batch_size, 28 * 28)).astype('float32') } prior = Z_prior(dim=128) gen = Generator(input_dim=128, dims=[128, 64, 64, features_size],
def main(save_to, num_epochs, regularization=0.001, subset=None, num_batches=None, batch_size=None, histogram=None, resume=False): output_size = 10 convnet = create_all_conv_net() x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) test_cost = (CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')) test_components = (ComponentwiseCrossEntropy().apply( y.flatten(), probs).copy(name='components')) test_error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) test_confusion = (ConfusionMatrix().apply(y.flatten(), probs).copy(name='confusion')) test_confusion.tag.aggregation_scheme = Sum(test_confusion) test_cg = ComputationGraph([test_cost, test_error_rate, test_components]) # Apply dropout to all layer outputs except final softmax dropout_vars = VariableFilter( roles=[OUTPUT], bricks=[Convolutional], theano_name_regex="^conv_[25]_apply_output$")(test_cg.variables) drop_cg = apply_dropout(test_cg, dropout_vars, 0.5) # Apply 0.2 dropout to the pre-averaging layer # dropout_vars_2 = VariableFilter( # roles=[OUTPUT], bricks=[Convolutional], # theano_name_regex="^conv_8_apply_output$")(drop_cg.variables) # train_cg = apply_dropout(drop_cg, dropout_vars_2, 0.2) # Apply 0.2 dropout to the input, as in the paper # train_cg = apply_dropout(drop_cg, [x], 0.2) train_cg = drop_cg # train_cg = test_cg train_cost, train_error_rate, train_components = train_cg.outputs # Apply regularization to the cost biases = VariableFilter(roles=[BIAS])(train_cg.parameters) weights = VariableFilter(roles=[WEIGHT])(train_cg.variables) l2_norm = sum([(W**2).sum() for W in weights]) l2_norm.name = 'l2_norm' l2_regularization = regularization * l2_norm l2_regularization.name = 'l2_regularization' test_cost = test_cost + l2_regularization test_cost.name = 'cost_with_regularization' # Training version of cost train_cost_without_regularization = train_cost train_cost_without_regularization.name = 'cost_without_regularization' train_cost = train_cost + regularization * l2_norm train_cost.name = 'cost_with_regularization' cifar10_train = CIFAR10(("train", )) #cifar10_train_stream = RandomPadCropFlip( # NormalizeBatchLevels(DataStream.default_stream( # cifar10_train, iteration_scheme=ShuffledScheme( # cifar10_train.num_examples, batch_size)), # which_sources=('features',)), # (32, 32), pad=5, which_sources=('features',)) cifar10_train_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_train, iteration_scheme=ShuffledScheme(cifar10_train.num_examples, batch_size)), which_sources=('features', )) test_batch_size = 1000 cifar10_test = CIFAR10(("test", )) cifar10_test_stream = NormalizeBatchLevels(DataStream.default_stream( cifar10_test, iteration_scheme=ShuffledScheme(cifar10_test.num_examples, test_batch_size)), which_sources=('features', )) momentum = Momentum(0.002, 0.9) # Create a step rule that doubles the learning rate of biases, like Caffe. # scale_bias = Restrict(Scale(2), biases) # step_rule = CompositeRule([scale_bias, momentum]) # step_rule = CompositeRule([StepClipping(100), momentum]) step_rule = momentum # Train with simple SGD algorithm = GradientDescent(cost=train_cost, parameters=train_cg.parameters, step_rule=step_rule) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), EpochSchedule(momentum.learning_rate, [(1, 0.005), (3, 0.01), (5, 0.02), (200, 0.002), (250, 0.0002), (300, 0.00002)]), DataStreamMonitoring([test_cost, test_error_rate, test_confusion], cifar10_test_stream, prefix="test"), TrainingDataMonitoring([ train_cost, train_error_rate, train_cost_without_regularization, l2_regularization, momentum.learning_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", every_n_batches=10), # after_epoch=True), Plot('Training performance for ' + save_to, channels=[ [ 'train_cost_with_regularization', 'train_cost_without_regularization', 'train_l2_regularization' ], ['train_error_rate'], ['train_total_gradient_norm'], ], every_n_batches=10), # after_batch=True), Plot('Test performance for ' + save_to, channels=[[ 'train_error_rate', 'test_error_rate', ]], after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing() ] if histogram: attribution = AttributionExtension(components=train_components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(save_to, True, True)) model = Model(train_cost) main_loop = MainLoop(algorithm, cifar10_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)