def main(): w = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(w, add_uniform(input=w, noise_level=.02))] stats = get_stats(w) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True) stat_monitor = Monitor('max', max) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[stat_monitor]) monitors = [w_channel, stat_channel] train_collapsed = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) valid_collapsed = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1=time.time() for epoch in range(100): t=time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) plot.update_plots(epoch, m) time.sleep(0.02) log.debug('----- '+make_time_units_string(time.time()-t)) for epoch in range(100): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) plot.update_plots(epoch, m) time.sleep(0.02) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME "+make_time_units_string(time.time()-t1))
def main(): # First, let's create a simple feedforward MLP with one hidden layer as a Prototype. mlp = Prototype() mlp.add( BasicLayer(input_size=28 * 28, output_size=1000, activation='rectifier', noise='dropout')) mlp.add(SoftmaxLayer(output_size=10)) # Now, we get to choose what values we want to monitor, and what datasets we would like to monitor on! # Each Model (in our case, the Prototype), has a get_monitors method that will return a useful # dictionary of {string_name: monitor_theano_expression} for various computations of the model we might # care about. By default, this method returns an empty dictionary - it was the model creator's job to # include potential monitor values. mlp_monitors = mlp.get_monitors() mlp_channel = MonitorsChannel(name="error") for name, expression in mlp_monitors.items(): mlp_channel.add( Monitor(name=name, expression=expression, train=True, valid=True, test=True)) # create some monitors for statistics about the hidden and output weights! # let's look at the mean, variance, and standard deviation of the weights matrices. weights_channel = MonitorsChannel(name="weights") hiddens_1 = mlp[0].get_params()[0] hiddens1_mean = T.mean(hiddens_1) weights_channel.add( Monitor(name="hiddens_mean", expression=hiddens1_mean, train=True)) hiddens_2 = mlp[1].get_params()[0] hiddens2_mean = T.mean(hiddens_2) weights_channel.add( Monitor(name="out_mean", expression=hiddens2_mean, train=True)) # create our plot object to do live plotting! plot = Plot(bokeh_doc_name="Monitor Tutorial", monitor_channels=[mlp_channel, weights_channel], open_browser=True) # use SGD optimizer optimizer = SGD(model=mlp, dataset=MNIST(concat_train_valid=False), n_epoch=500, save_frequency=100, batch_size=600, learning_rate=.01, lr_decay=False, momentum=.9, nesterov_momentum=True) # train, with the plot! optimizer.train(plot=plot)
def main(plot=None, n_epoch=10): print('... loading and seting-up data') # don't concatenate together train and valid sets mnist_dataset = MNIST(concat_train_valid=False) print('... building the model structure') # create the mlp model from a Prototype model = build_model() optimizer, error = setup_optimization(model, n_epoch, mnist_dataset) print('... training the model') # [optional] use keyboardInterrupt to save the latest parameters. if plot: plot = Plot("OpenDeep MLP Example", monitor_channels=error, open_browser=True) optimizer.train(monitor_channels=error, plot=plot) print('... evaluating model') test_data, test_labels = split_data(mnist_dataset) evaluate(test_data, test_labels, model)
def main(): var = theano.shared(T.zeros(shape=(88, 100), dtype=theano.config.floatX).eval(), name='W') updates = [(var, add_uniform(input=var, noise_level=.02))] stats = get_stats(var) l1 = stats.pop('l1') l2 = stats.pop('l2') min = stats.pop('min') max = stats.pop('max') var = stats.pop('var') std = stats.pop('std') mean = stats.pop('mean') mean_monitor = Monitor('mean', mean, train=True, valid=True) var_monitor = Monitor('var', var) w_channel = MonitorsChannel('W', monitors=mean_monitor) stat_channel = MonitorsChannel('stats', monitors=[var_monitor]) monitors = [w_channel, stat_channel] train_collapsed = collapse_channels(monitors, train=True) train_collapsed = OrderedDict([(name, expression) for name, expression, _ in train_collapsed]) valid_collapsed = collapse_channels(monitors, valid=True) valid_collapsed = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed]) plot = Plot(bokeh_doc_name='test_plots', monitor_channels=monitors, open_browser=True) log.debug('compiling...') f = theano.function(inputs=[], outputs=list(train_collapsed.values()), updates=updates) f2 = theano.function(inputs=[], outputs=list(valid_collapsed.values()), updates=updates) log.debug('done') t1 = time.time() for epoch in range(100): t = time.time() log.debug(epoch) vals = f() m = OrderedDict(zip(train_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) for epoch in range(100): t = time.time() log.debug(epoch) vals = f2() m = OrderedDict(zip(valid_collapsed.keys(), vals)) plot.update_plots(epoch, m) log.debug('----- ' + make_time_units_string(time.time() - t)) log.debug("TOTAL TIME " + make_time_units_string(time.time() - t1))
def run_midi(dataset): log.info("Creating RNN-RBM for dataset %s!", dataset) outdir = "outputs/rnnrbm/%s/" % dataset # grab the MIDI dataset if dataset == 'nottingham': midi = Nottingham() elif dataset == 'jsb': midi = JSBChorales() elif dataset == 'muse': midi = MuseData() elif dataset == 'piano_de': midi = PianoMidiDe() else: raise AssertionError("dataset %s not recognized." % dataset) # create the RNN-RBM # rng = numpy.random # rng.seed(0xbeef) # mrg = RandomStreams(seed=rng.randint(1 << 30)) rng = numpy.random.RandomState(1234) mrg = RandomStreams(rng.randint(2**30)) # rnnrbm = RNN_RBM(input_size=88, # hidden_size=150, # rnn_hidden_size=100, # k=15, # weights_init='gaussian', # weights_std=0.01, # rnn_weights_init='gaussian', # rnn_weights_std=0.0001, # rng=rng, # outdir=outdir) rnnrbm = RNN_RBM( input_size=88, hidden_size=150, rnn_hidden_size=100, k=15, weights_init='gaussian', weights_std=0.01, rnn_weights_init='identity', rnn_hidden_activation='relu', # rnn_weights_init='gaussian', # rnn_hidden_activation='tanh', rnn_weights_std=0.0001, mrg=mrg, outdir=outdir) # make an optimizer to train it optimizer = SGD(model=rnnrbm, dataset=midi, epochs=200, batch_size=100, min_batch_size=2, learning_rate=.001, save_freq=10, stop_patience=200, momentum=False, momentum_decay=False, nesterov_momentum=False) optimizer = AdaDelta( model=rnnrbm, dataset=midi, epochs=200, batch_size=100, min_batch_size=2, # learning_rate=1e-4, learning_rate=1e-6, save_freq=10, stop_patience=200) ll = Monitor('pseudo-log', rnnrbm.get_monitors()['pseudo-log'], test=True) mse = Monitor('frame-error', rnnrbm.get_monitors()['mse'], valid=True, test=True) plot = Plot(bokeh_doc_name='rnnrbm_midi_%s' % dataset, monitor_channels=[ll, mse], open_browser=True) # perform training! optimizer.train(plot=plot) # use the generate function! generated, _ = rnnrbm.generate(initial=None, n_steps=200) dt = 0.3 r = (21, 109) midiwrite(outdir + 'rnnrbm_generated_midi.mid', generated, r=r, dt=dt) if has_pylab: extent = (0, dt * len(generated)) + r pylab.figure() pylab.imshow(generated.T, origin='lower', aspect='auto', interpolation='nearest', cmap=pylab.cm.gray_r, extent=extent) pylab.xlabel('time (s)') pylab.ylabel('MIDI note number') pylab.title('generated piano-roll') # Construct image from the weight matrix image = Image.fromarray( tile_raster_images( X=rnnrbm.W.get_value(borrow=True).T, img_shape=closest_to_square_factors(rnnrbm.input_size), tile_shape=closest_to_square_factors(rnnrbm.hidden_size), tile_spacing=(1, 1))) image.save(outdir + 'rnnrbm_midi_weights.png') log.debug("done!") del midi del rnnrbm del optimizer
def run_sequence(sequence=0): log.info("Creating RNN-RBM for sequence %d!" % sequence) # grab the MNIST dataset mnist = MNIST(sequence_number=sequence, concat_train_valid=True) outdir = "outputs/rnnrbm/mnist_%d/" % sequence # create the RNN-RBM rng = numpy.random.RandomState(1234) mrg = RandomStreams(rng.randint(2**30)) rnnrbm = RNN_RBM(input_size=28 * 28, hidden_size=1000, rnn_hidden_size=100, k=15, weights_init='uniform', weights_interval=4 * numpy.sqrt(6. / (28 * 28 + 500)), rnn_weights_init='identity', rnn_hidden_activation='relu', rnn_weights_std=1e-4, mrg=mrg, outdir=outdir) # load pretrained rbm on mnist # rnnrbm.load_params(outdir + 'trained_epoch_200.pkl') # make an optimizer to train it (AdaDelta is a good default) optimizer = AdaDelta(model=rnnrbm, dataset=mnist, n_epoch=200, batch_size=100, minimum_batch_size=2, learning_rate=1e-8, save_frequency=10, early_stop_length=200) crossentropy = Monitor('crossentropy', rnnrbm.get_monitors()['crossentropy'], test=True) error = Monitor('error', rnnrbm.get_monitors()['mse'], test=True) plot = Plot(bokeh_doc_name='rnnrbm_mnist_%d' % sequence, monitor_channels=[crossentropy, error], open_browser=True) # perform training! optimizer.train(plot=plot) # use the generate function! log.debug("generating images...") generated, ut = rnnrbm.generate(initial=None, n_steps=400) # Construct image image = Image.fromarray( tile_raster_images(X=generated, img_shape=(28, 28), tile_shape=(20, 20), tile_spacing=(1, 1))) image.save(outdir + "rnnrbm_mnist_generated.png") log.debug('saved generated.png') # Construct image from the weight matrix image = Image.fromarray( tile_raster_images(X=rnnrbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=closest_to_square_factors( rnnrbm.hidden_size), tile_spacing=(1, 1))) image.save(outdir + "rnnrbm_mnist_weights.png") log.debug("done!") del mnist del rnnrbm del optimizer