def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True, out_service=FileService('outs/mean.txt')) var_monitor = Monitor('var', var, out_service=FileService('outs/var.txt')) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed_raw = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(item[0], item[1]) for item in train_collapsed_raw]) train_services = OrderedDict([(item[0], item[2]) for item in train_collapsed_raw]) valid_collapsed_raw = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(item[0], item[1]) for item in valid_collapsed_raw]) valid_services = OrderedDict([(item[0], item[2]) for item in valid_collapsed_raw]) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1=time.time() for epoch in range(10): t=time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) for name, service in train_services.items(): if name in m: service.write(m[name], "train") log.debug('----- '+make_time_units_string(time.time()-t)) for epoch in range(10): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) for name, service in valid_services.items(): if name in m: service.write(m[name], "valid") log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
def main(): # First, let's create a simple feedforward MLP with one hidden layer as a Prototype. mlp = Prototype() mlp.add( BasicLayer(input_size=28 * 28, output_size=1000, activation='rectifier', noise='dropout')) mlp.add(SoftmaxLayer(output_size=10)) # Now, we get to choose what values we want to monitor, and what datasets we would like to monitor on! # Each Model (in our case, the Prototype), has a get_monitors method that will return a useful # dictionary of {string_name: monitor_theano_expression} for various computations of the model we might # care about. By default, this method returns an empty dictionary - it was the model creator's job to # include potential monitor values. mlp_monitors = mlp.get_monitors() mlp_channel = MonitorsChannel(name="error") for name, expression in mlp_monitors.items(): mlp_channel.add( Monitor(name=name, expression=expression, train=True, valid=True, test=True)) # create some monitors for statistics about the hidden and output weights! # let's look at the mean, variance, and standard deviation of the weights matrices. weights_channel = MonitorsChannel(name="weights") hiddens_1 = mlp[0].get_params()[0] hiddens1_mean = T.mean(hiddens_1) weights_channel.add( Monitor(name="hiddens_mean", expression=hiddens1_mean, train=True)) hiddens_2 = mlp[1].get_params()[0] hiddens2_mean = T.mean(hiddens_2) weights_channel.add( Monitor(name="out_mean", expression=hiddens2_mean, train=True)) # create our plot object to do live plotting! plot = Plot(bokeh_doc_name="Monitor Tutorial", monitor_channels=[mlp_channel, weights_channel], open_browser=True) # use SGD optimizer optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=False), n_epoch=500, save_frequency=100, batch_size=600, learning_rate=.01, lr_decay=False, momentum=.9, nesterov_momentum=True) # train, with the plot! optimizer.train(plot=plot)
def main(): # First, let's create a simple feedforward MLP with one hidden layer as a Prototype. mlp = Prototype() mlp.add(BasicLayer(input_size=28*28, output_size=1000, activation='rectifier', noise='dropout')) mlp.add(SoftmaxLayer(output_size=10)) # Now, we get to choose what values we want to monitor, and what datasets we would like to monitor on! # Each Model (in our case, the Prototype), has a get_monitors method that will return a useful # dictionary of {string_name: monitor_theano_expression} for various computations of the model we might # care about. By default, this method returns an empty dictionary - it was the model creator's job to # include potential monitor values. mlp_monitors = mlp.get_monitors() mlp_channel = MonitorsChannel(name="error") for name, expression in mlp_monitors.items(): mlp_channel.add(Monitor(name=name, expression=expression, train=True, valid=True, test=True)) # create some monitors for statistics about the hidden and output weights! # let's look at the mean, variance, and standard deviation of the weights matrices. weights_channel = MonitorsChannel(name="weights") hiddens_1 = mlp[0].get_params()[0] hiddens1_mean = T.mean(hiddens_1) weights_channel.add(Monitor(name="hiddens_mean", expression=hiddens1_mean, train=True)) hiddens_2 = mlp[1].get_params()[0] hiddens2_mean = T.mean(hiddens_2) weights_channel.add(Monitor(name="out_mean", expression=hiddens2_mean, train=True)) # create our plot object to do live plotting! plot = Plot(bokeh_doc_name="Monitor Tutorial", monitor_channels=[mlp_channel, weights_channel], open_browser=True) # use SGD optimizer optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=False), n_epoch=500, save_frequency=100, batch_size=600, learning_rate=.01, lr_decay=False, momentum=.9, nesterov_momentum=True) # train, with the plot! optimizer.train(plot=plot)
def main(): ######################################## # Initialization things with arguments # ######################################## # use these arguments to get results from paper referenced above _train_args = {"epochs": 1000, # maximum number of times to run through the dataset "batch_size": 100, # number of examples to process in parallel (minibatch) "min_batch_size": 1, # the minimum number of examples for a batch to be considered "save_freq": 1, # how many epochs between saving parameters "stop_threshold": .9995, # multiplier for how much the train cost to improve to not stop early "stop_patience": 500, # how many epochs to wait to see if the threshold has been reached "learning_rate": .25, # initial learning rate for SGD "lr_decay": 'exponential', # the decay function to use for the learning rate parameter "lr_decay_factor": .995, # by how much to decay the learning rate each epoch "momentum": 0.5, # the parameter momentum amount 'momentum_decay': False, # how to decay the momentum each epoch (if applicable) 'momentum_factor': 0, # by how much to decay the momentum (in this case not at all) 'nesterov_momentum': False, # whether to use nesterov momentum update (accelerated momentum) } config_root_logger() log.info("Creating a new GSN") mnist = MNIST(concat_train_valid=True) gsn = GSN(layers=2, walkbacks=4, hidden_size=1500, visible_activation='sigmoid', hidden_activation='tanh', input_size=28*28, tied_weights=True, hidden_add_noise_sigma=2, input_salt_and_pepper=0.4, outdir='outputs/test_gsn/', vis_init=False, noiseless_h1=True, input_sampling=True, weights_init='uniform', weights_interval='montreal', bias_init=0, cost_function='binary_crossentropy') recon_cost_channel = MonitorsChannel(name='cost') recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True)) recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True)) # Load initial weights and biases from file # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl' # gsn.load_params(params_to_load) optimizer = SGD(model=gsn, dataset=mnist, **_train_args) # optimizer = AdaDelta(model=gsn, dataset=mnist, epochs=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=recon_cost_channel) # Save some reconstruction output images n_examples = 100 xs_test = mnist.test_inputs[:n_examples] noisy_xs_test = gsn.f_noise(xs_test) reconstructed = gsn.run(noisy_xs_test) # Concatenate stuff stacked = numpy.vstack( [numpy.vstack([xs_test[i * 10: (i + 1) * 10], noisy_xs_test[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30)) ) number_reconstruction.save(gsn.outdir + 'reconstruction.png') log.info("saved output image!") # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images( X=gsn.weights_list[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=closest_to_square_factors(gsn.hidden_size), tile_spacing=(1, 1) ) ) image.save(gsn.outdir + "gsn_mnist_weights.png")
def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True) var_monitor = Monitor('var', var) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) valid_collapsed = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1 = time.time() for epoch in range(100): t = time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) for epoch in range(100): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME " + make_time_units_string(time.time() - t1))
def main(): ######################################## # Initialization things with arguments # ######################################## # use these arguments to get results from paper referenced above _train_args = {"n_epoch": 1000, # maximum number of times to run through the dataset "batch_size": 100, # number of examples to process in parallel (minibatch) "minimum_batch_size": 1, # the minimum number of examples for a batch to be considered "save_frequency": 1, # how many epochs between saving parameters "early_stop_threshold": .9995, # multiplier for how much the train cost to improve to not stop early "early_stop_length": 500, # how many epochs to wait to see if the threshold has been reached "learning_rate": .25, # initial learning rate for SGD "lr_decay": 'exponential', # the decay function to use for the learning rate parameter "lr_factor": .995, # by how much to decay the learning rate each epoch "momentum": 0.5, # the parameter momentum amount 'momentum_decay': False, # how to decay the momentum each epoch (if applicable) 'momentum_factor': 0, # by how much to decay the momentum (in this case not at all) 'nesterov_momentum': False, # whether to use nesterov momentum update (accelerated momentum) } config_root_logger() log.info("Creating a new GSN") mnist = MNIST(concat_train_valid=True) gsn = GSN(layers=2, walkbacks=4, hidden_size=1500, visible_activation='sigmoid', hidden_activation='tanh', input_size=28*28, tied_weights=True, hidden_add_noise_sigma=2, input_salt_and_pepper=0.4, outdir='outputs/test_gsn/', vis_init=False, noiseless_h1=True, input_sampling=True, weights_init='uniform', weights_interval='montreal', bias_init=0, cost_function='binary_crossentropy') recon_cost_channel = MonitorsChannel(name='cost') recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True)) recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True)) # Load initial weights and biases from file # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl' # gsn.load_params(params_to_load) optimizer = SGD(model=gsn, dataset=mnist, **_train_args) # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=recon_cost_channel) # Save some reconstruction output images import opendeep.data.dataset as datasets n_examples = 100 xs_test, _ = mnist.getSubset(datasets.TEST) xs_test = xs_test[:n_examples].eval() noisy_xs_test = gsn.f_noise(xs_test) reconstructed = gsn.run(noisy_xs_test) # Concatenate stuff stacked = numpy.vstack( [numpy.vstack([xs_test[i * 10: (i + 1) * 10], noisy_xs_test[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30)) ) number_reconstruction.save(gsn.outdir + 'reconstruction.png') log.info("saved output image!") # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images( X=gsn.weights_list[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=closest_to_square_factors(gsn.hidden_size), tile_spacing=(1, 1) ) ) image.save(gsn.outdir + "gsn_mnist_weights.png")