def main(): # First, let's create a simple feedforward MLP with one hidden layer as a Prototype. mlp = Prototype() mlp.add( BasicLayer(input_size=28 * 28, output_size=1000, activation='rectifier', noise='dropout')) mlp.add(SoftmaxLayer(output_size=10)) # Now, we get to choose what values we want to monitor, and what datasets we would like to monitor on! # Each Model (in our case, the Prototype), has a get_monitors method that will return a useful # dictionary of {string_name: monitor_theano_expression} for various computations of the model we might # care about. By default, this method returns an empty dictionary - it was the model creator's job to # include potential monitor values. mlp_monitors = mlp.get_monitors() mlp_channel = MonitorsChannel(name="error") for name, expression in mlp_monitors.items(): mlp_channel.add( Monitor(name=name, expression=expression, train=True, valid=True, test=True)) # create some monitors for statistics about the hidden and output weights! # let's look at the mean, variance, and standard deviation of the weights matrices. weights_channel = MonitorsChannel(name="weights") hiddens_1 = mlp[0].get_params()[0] hiddens1_mean = T.mean(hiddens_1) weights_channel.add( Monitor(name="hiddens_mean", expression=hiddens1_mean, train=True)) hiddens_2 = mlp[1].get_params()[0] hiddens2_mean = T.mean(hiddens_2) weights_channel.add( Monitor(name="out_mean", expression=hiddens2_mean, train=True)) # create our plot object to do live plotting! plot = Plot(bokeh_doc_name="Monitor Tutorial", monitor_channels=[mlp_channel, weights_channel], open_browser=True) # use SGD optimizer optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=False), n_epoch=500, save_frequency=100, batch_size=600, learning_rate=.01, lr_decay=False, momentum=.9, nesterov_momentum=True) # train, with the plot! optimizer.train(plot=plot)
def main(): # First, let's create a simple feedforward MLP with one hidden layer as a Prototype. mlp = Prototype() mlp.add(BasicLayer(input_size=28*28, output_size=1000, activation='rectifier', noise='dropout')) mlp.add(SoftmaxLayer(output_size=10)) # Now, we get to choose what values we want to monitor, and what datasets we would like to monitor on! # Each Model (in our case, the Prototype), has a get_monitors method that will return a useful # dictionary of {string_name: monitor_theano_expression} for various computations of the model we might # care about. By default, this method returns an empty dictionary - it was the model creator's job to # include potential monitor values. mlp_monitors = mlp.get_monitors() mlp_channel = MonitorsChannel(name="error") for name, expression in mlp_monitors.items(): mlp_channel.add(Monitor(name=name, expression=expression, train=True, valid=True, test=True)) # create some monitors for statistics about the hidden and output weights! # let's look at the mean, variance, and standard deviation of the weights matrices. weights_channel = MonitorsChannel(name="weights") hiddens_1 = mlp[0].get_params()[0] hiddens1_mean = T.mean(hiddens_1) weights_channel.add(Monitor(name="hiddens_mean", expression=hiddens1_mean, train=True)) hiddens_2 = mlp[1].get_params()[0] hiddens2_mean = T.mean(hiddens_2) weights_channel.add(Monitor(name="out_mean", expression=hiddens2_mean, train=True)) # create our plot object to do live plotting! plot = Plot(bokeh_doc_name="Monitor Tutorial", monitor_channels=[mlp_channel, weights_channel], open_browser=True) # use SGD optimizer optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=False), n_epoch=500, save_frequency=100, batch_size=600, learning_rate=.01, lr_decay=False, momentum=.9, nesterov_momentum=True) # train, with the plot! optimizer.train(plot=plot)
def setup_optimization(model, n_epoch, mnist_dataset): # setup optimizer stochastic gradient descent optimizer = SGD(model=model, dataset=mnist_dataset, n_epoch=n_epoch, batch_size=600, learning_rate=.01, momentum=.9, nesterov_momentum=True, save_frequency=500, early_stop_threshold=0.997) # create a Monitor to view progress on a metric other than training cost error = Monitor('error', model.get_monitors()['softmax_error'], train=True, valid=True, test=True) return optimizer, error
def main(): ######################################## # Initialization things with arguments # ######################################## # use these arguments to get results from paper referenced above _train_args = {"epochs": 1000, # maximum number of times to run through the dataset "batch_size": 100, # number of examples to process in parallel (minibatch) "min_batch_size": 1, # the minimum number of examples for a batch to be considered "save_freq": 1, # how many epochs between saving parameters "stop_threshold": .9995, # multiplier for how much the train cost to improve to not stop early "stop_patience": 500, # how many epochs to wait to see if the threshold has been reached "learning_rate": .25, # initial learning rate for SGD "lr_decay": 'exponential', # the decay function to use for the learning rate parameter "lr_decay_factor": .995, # by how much to decay the learning rate each epoch "momentum": 0.5, # the parameter momentum amount 'momentum_decay': False, # how to decay the momentum each epoch (if applicable) 'momentum_factor': 0, # by how much to decay the momentum (in this case not at all) 'nesterov_momentum': False, # whether to use nesterov momentum update (accelerated momentum) } config_root_logger() log.info("Creating a new GSN") mnist = MNIST(concat_train_valid=True) gsn = GSN(layers=2, walkbacks=4, hidden_size=1500, visible_activation='sigmoid', hidden_activation='tanh', input_size=28*28, tied_weights=True, hidden_add_noise_sigma=2, input_salt_and_pepper=0.4, outdir='outputs/test_gsn/', vis_init=False, noiseless_h1=True, input_sampling=True, weights_init='uniform', weights_interval='montreal', bias_init=0, cost_function='binary_crossentropy') recon_cost_channel = MonitorsChannel(name='cost') recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True)) recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True)) # Load initial weights and biases from file # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl' # gsn.load_params(params_to_load) optimizer = SGD(model=gsn, dataset=mnist, **_train_args) # optimizer = AdaDelta(model=gsn, dataset=mnist, epochs=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=recon_cost_channel) # Save some reconstruction output images n_examples = 100 xs_test = mnist.test_inputs[:n_examples] noisy_xs_test = gsn.f_noise(xs_test) reconstructed = gsn.run(noisy_xs_test) # Concatenate stuff stacked = numpy.vstack( [numpy.vstack([xs_test[i * 10: (i + 1) * 10], noisy_xs_test[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30)) ) number_reconstruction.save(gsn.outdir + 'reconstruction.png') log.info("saved output image!") # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images( X=gsn.weights_list[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=closest_to_square_factors(gsn.hidden_size), tile_spacing=(1, 1) ) ) image.save(gsn.outdir + "gsn_mnist_weights.png")
def add_list_layers(): # You can also add lists of layers at a time (or as initialization) to a Prototype! This lets you specify # more complex interactions between layers! hidden1 = BasicLayer(input_size=28 * 28, output_size=512, activation='rectifier', noise='dropout') hidden2 = BasicLayer(inputs_hook=(512, hidden1.get_outputs()), output_size=512, activation='rectifier', noise='dropout') class_layer = SoftmaxLayer(inputs_hook=(512, hidden2.get_outputs()), output_size=10) mlp = Prototype([hidden1, hidden2, class_layer]) return mlp if __name__ == '__main__': mlp = sequential_add_layers() optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=True), n_epoch=500, batch_size=600, learning_rate=.01, momentum=.9, nesterov_momentum=True) optimizer.train()
def run_midi(dataset): log.info("Creating RNN-RBM for dataset %s!", dataset) outdir = "outputs/rnnrbm/%s/" % dataset # grab the MIDI dataset if dataset == 'nottingham': midi = Nottingham() elif dataset == 'jsb': midi = JSBChorales() elif dataset == 'muse': midi = MuseData() elif dataset == 'piano_de': midi = PianoMidiDe() else: raise AssertionError("dataset %s not recognized." % dataset) # create the RNN-RBM # rng = numpy.random # rng.seed(0xbeef) # mrg = RandomStreams(seed=rng.randint(1 << 30)) rng = numpy.random.RandomState(1234) mrg = RandomStreams(rng.randint(2 ** 30)) # rnnrbm = RNN_RBM(input_size=88, # hidden_size=150, # rnn_hidden_size=100, # k=15, # weights_init='gaussian', # weights_std=0.01, # rnn_weights_init='gaussian', # rnn_weights_std=0.0001, # rng=rng, # outdir=outdir) rnnrbm = RNN_RBM(input_size=88, hidden_size=150, rnn_hidden_size=100, k=15, weights_init='gaussian', weights_std=0.01, rnn_weights_init='identity', rnn_hidden_activation='relu', # rnn_weights_init='gaussian', # rnn_hidden_activation='tanh', rnn_weights_std=0.0001, mrg=mrg, outdir=outdir) # make an optimizer to train it optimizer = SGD(model=rnnrbm, dataset=midi, n_epoch=200, batch_size=100, minimum_batch_size=2, learning_rate=.001, save_frequency=10, early_stop_length=200, momentum=False, momentum_decay=False, nesterov_momentum=False) optimizer = AdaDelta(model=rnnrbm, dataset=midi, n_epoch=200, batch_size=100, minimum_batch_size=2, # learning_rate=1e-4, learning_rate=1e-6, save_frequency=10, early_stop_length=200) ll = Monitor('pseudo-log', rnnrbm.get_monitors()['pseudo-log'], test=True) mse = Monitor('frame-error', rnnrbm.get_monitors()['mse'], valid=True, test=True) plot = Plot(bokeh_doc_name='rnnrbm_midi_%s' % dataset, monitor_channels=[ll, mse], open_browser=True) # perform training! optimizer.train(plot=plot) # use the generate function! generated, _ = rnnrbm.generate(initial=None, n_steps=200) dt = 0.3 r = (21, 109) midiwrite(outdir + 'rnnrbm_generated_midi.mid', generated, r=r, dt=dt) if has_pylab: extent = (0, dt * len(generated)) + r pylab.figure() pylab.imshow(generated.T, origin='lower', aspect='auto', interpolation='nearest', cmap=pylab.cm.gray_r, extent=extent) pylab.xlabel('time (s)') pylab.ylabel('MIDI note number') pylab.title('generated piano-roll') # Construct image from the weight matrix image = Image.fromarray( tile_raster_images( X=rnnrbm.W.get_value(borrow=True).T, img_shape=closest_to_square_factors(rnnrbm.input_size), tile_shape=closest_to_square_factors(rnnrbm.hidden_size), tile_spacing=(1, 1) ) ) image.save(outdir + 'rnnrbm_midi_weights.png') log.debug("done!") del midi del rnnrbm del optimizer
# although this is recommended over print statements everywhere import logging import opendeep.log.logger as logger logger.config_root_logger() log = logging.getLogger(__name__) log.info("Creating RBM!") # grab the MNIST dataset mnist = MNIST(concat_train_valid=False) # create the RBM rng = numpy.random.RandomState(1234) mrg = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(2**30)) rbm = RBM(input_size=28*28, hidden_size=500, k=15, weights_init='uniform', weights_interval=4*numpy.sqrt(6./(28*28+500)), rng=rng) # rbm.load_params('rbm_trained.pkl') # make an optimizer to train it (AdaDelta is a good default) optimizer = SGD(model=rbm, dataset=mnist, n_epoch=15, batch_size=20, learning_rate=0.1, lr_decay=False, nesterov_momentum=False) # optimizer = AdaDelta(model=rbm, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) # perform training! optimizer.train() # test it on some images! test_data = mnist.getSubset(TEST)[0] test_data = test_data[:25].eval() # use the run function! preds = rbm.run(test_data) # Construct image from the test matrix image = Image.fromarray( tile_raster_images( X=test_data, img_shape=(28, 28), tile_shape=(5, 5),
def run_midi(dataset): log.info("Creating RNN-RBM for dataset %s!", dataset) outdir = "outputs/rnnrbm/%s/" % dataset # grab the MIDI dataset if dataset == 'nottingham': midi = Nottingham() elif dataset == 'jsb': midi = JSBChorales() elif dataset == 'muse': midi = MuseData() elif dataset == 'piano_de': midi = PianoMidiDe() else: raise AssertionError("dataset %s not recognized." % dataset) # create the RNN-RBM # rng = numpy.random # rng.seed(0xbeef) # mrg = RandomStreams(seed=rng.randint(1 << 30)) rng = numpy.random.RandomState(1234) mrg = RandomStreams(rng.randint(2**30)) # rnnrbm = RNN_RBM(input_size=88, # hidden_size=150, # rnn_hidden_size=100, # k=15, # weights_init='gaussian', # weights_std=0.01, # rnn_weights_init='gaussian', # rnn_weights_std=0.0001, # rng=rng, # outdir=outdir) rnnrbm = RNN_RBM( input_size=88, hidden_size=150, rnn_hidden_size=100, k=15, weights_init='gaussian', weights_std=0.01, rnn_weights_init='identity', rnn_hidden_activation='relu', # rnn_weights_init='gaussian', # rnn_hidden_activation='tanh', rnn_weights_std=0.0001, mrg=mrg, outdir=outdir) # make an optimizer to train it optimizer = SGD(model=rnnrbm, dataset=midi, epochs=200, batch_size=100, min_batch_size=2, learning_rate=.001, save_freq=10, stop_patience=200, momentum=False, momentum_decay=False, nesterov_momentum=False) optimizer = AdaDelta( model=rnnrbm, dataset=midi, epochs=200, batch_size=100, min_batch_size=2, # learning_rate=1e-4, learning_rate=1e-6, save_freq=10, stop_patience=200) ll = Monitor('pseudo-log', rnnrbm.get_monitors()['pseudo-log'], test=True) mse = Monitor('frame-error', rnnrbm.get_monitors()['mse'], valid=True, test=True) plot = Plot(bokeh_doc_name='rnnrbm_midi_%s' % dataset, monitor_channels=[ll, mse], open_browser=True) # perform training! optimizer.train(plot=plot) # use the generate function! generated, _ = rnnrbm.generate(initial=None, n_steps=200) dt = 0.3 r = (21, 109) midiwrite(outdir + 'rnnrbm_generated_midi.mid', generated, r=r, dt=dt) if has_pylab: extent = (0, dt * len(generated)) + r pylab.figure() pylab.imshow(generated.T, origin='lower', aspect='auto', interpolation='nearest', cmap=pylab.cm.gray_r, extent=extent) pylab.xlabel('time (s)') pylab.ylabel('MIDI note number') pylab.title('generated piano-roll') # Construct image from the weight matrix image = Image.fromarray( tile_raster_images( X=rnnrbm.W.get_value(borrow=True).T, img_shape=closest_to_square_factors(rnnrbm.input_size), tile_shape=closest_to_square_factors(rnnrbm.hidden_size), tile_spacing=(1, 1))) image.save(outdir + 'rnnrbm_midi_weights.png') log.debug("done!") del midi del rnnrbm del optimizer
def main(): ######################################## # Initialization things with arguments # ######################################## # use these arguments to get results from paper referenced above _train_args = {"n_epoch": 1000, # maximum number of times to run through the dataset "batch_size": 100, # number of examples to process in parallel (minibatch) "minimum_batch_size": 1, # the minimum number of examples for a batch to be considered "save_frequency": 1, # how many epochs between saving parameters "early_stop_threshold": .9995, # multiplier for how much the train cost to improve to not stop early "early_stop_length": 500, # how many epochs to wait to see if the threshold has been reached "learning_rate": .25, # initial learning rate for SGD "lr_decay": 'exponential', # the decay function to use for the learning rate parameter "lr_factor": .995, # by how much to decay the learning rate each epoch "momentum": 0.5, # the parameter momentum amount 'momentum_decay': False, # how to decay the momentum each epoch (if applicable) 'momentum_factor': 0, # by how much to decay the momentum (in this case not at all) 'nesterov_momentum': False, # whether to use nesterov momentum update (accelerated momentum) } config_root_logger() log.info("Creating a new GSN") mnist = MNIST(concat_train_valid=True) gsn = GSN(layers=2, walkbacks=4, hidden_size=1500, visible_activation='sigmoid', hidden_activation='tanh', input_size=28*28, tied_weights=True, hidden_add_noise_sigma=2, input_salt_and_pepper=0.4, outdir='outputs/test_gsn/', vis_init=False, noiseless_h1=True, input_sampling=True, weights_init='uniform', weights_interval='montreal', bias_init=0, cost_function='binary_crossentropy') recon_cost_channel = MonitorsChannel(name='cost') recon_cost_channel.add(Monitor('recon_cost', gsn.get_monitors()['recon_cost'], test=True)) recon_cost_channel.add(Monitor('noisy_recon_cost', gsn.get_monitors()['noisy_recon_cost'], test=True)) # Load initial weights and biases from file # params_to_load = '../../../outputs/gsn/mnist/trained_epoch_395.pkl' # gsn.load_params(params_to_load) optimizer = SGD(model=gsn, dataset=mnist, **_train_args) # optimizer = AdaDelta(model=gsn, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) optimizer.train(monitor_channels=recon_cost_channel) # Save some reconstruction output images import opendeep.data.dataset as datasets n_examples = 100 xs_test, _ = mnist.getSubset(datasets.TEST) xs_test = xs_test[:n_examples].eval() noisy_xs_test = gsn.f_noise(xs_test) reconstructed = gsn.run(noisy_xs_test) # Concatenate stuff stacked = numpy.vstack( [numpy.vstack([xs_test[i * 10: (i + 1) * 10], noisy_xs_test[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (gsn.image_height, gsn.image_width), (10, 30)) ) number_reconstruction.save(gsn.outdir + 'reconstruction.png') log.info("saved output image!") # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images( X=gsn.weights_list[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=closest_to_square_factors(gsn.hidden_size), tile_spacing=(1, 1) ) ) image.save(gsn.outdir + "gsn_mnist_weights.png")