def __init__(self, dim, **kwargs): super(FeedbackRNN, self).__init__(**kwargs) self.dim = dim self.first_recurrent_layer = SimpleRecurrent( dim=self.dim, activation=Identity(), name='first_recurrent_layer', weights_init=initialization.Identity()) self.second_recurrent_layer = SimpleRecurrent( dim=self.dim, activation=Identity(), name='second_recurrent_layer', weights_init=initialization.Identity()) self.children = [ self.first_recurrent_layer, self.second_recurrent_layer ]
def example(): """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """ x = tensor.tensor3('x') rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity()) rnn.initialize() h = rnn.apply(x) f = theano.function([x], h) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) doubler = Linear(input_dim=3, output_dim=3, weights_init=initialization.Identity(2), biases_init=initialization.Constant(0)) doubler.initialize() h_doubler = rnn.apply(doubler.apply(x)) f = theano.function([x], h_doubler) print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) #Initial State h0 = tensor.matrix('h0') h = rnn.apply(inputs=x, states=h0) f = theano.function([x, h0], h) print( f(np.ones((3, 1, 3), dtype=theano.config.floatX), np.ones((1, 3), dtype=theano.config.floatX)))
def __init__(self, dim, depth, **kwargs): super(FeedbackRNNStack, self).__init__(**kwargs) self.dim = dim self.depth = depth self.children = [] FeedbackRNNStack.depth = depth for i in range(depth): self.children.append( SimpleRecurrent(dim=self.dim, activation=Identity(), name=str(i) + 'th_recurrent_layer', weights_init=initialization.Identity()))
def setUp(self): prototype = SimpleRecurrent(dim=3, activation=Tanh()) self.layers = [ Bidirectional(weights_init=Orthogonal(), prototype=prototype) for _ in range(3) ] self.stack = RecurrentStack(self.layers) for fork in self.stack.forks: fork.weights_init = Identity(1) fork.biases_init = Constant(0) self.stack.initialize() self.x_val = 0.1 * numpy.asarray( list(itertools.permutations(range(4))), dtype=theano.config.floatX) self.x_val = (numpy.ones( (24, 4, 3), dtype=theano.config.floatX) * self.x_val[..., None]) self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX) self.mask_val[12:24, 3] = 0
class CustomLSTMWeights(NdarrayInitialization): # Identity in the diagonal and IsotropicGaussian everywhere else def __init__(self, std=1, mean=0): self.gaussian_init = IsotropicGaussian(std = std, mean = mean) self.identity = Identity() def generate(self, rng, shape): if len(shape) != 2: raise ValueError assert shape[0] == shape[1] size = shape/4 assert size*4 == shape[0] result = numpy.array([]) for i in range(4): row = numpy.array([]) for j in range(4): if i == j: square = self.gaussian_init.generate(rng, (size,size)) else: square = self.identity.generate(rng, (size,size)) row = numpy.hstack(row,square) result.vstack(row) return result.astype(theano.config.floatX)
class CustomLSTMWeights(NdarrayInitialization): # Identity in the diagonal and IsotropicGaussian everywhere else def __init__(self, std=1, mean=0): self.gaussian_init = IsotropicGaussian(std=std, mean=mean) self.identity = Identity() def generate(self, rng, shape): if len(shape) != 2: raise ValueError assert shape[0] == shape[1] size = shape / 4 assert size * 4 == shape[0] result = numpy.array([]) for i in range(4): row = numpy.array([]) for j in range(4): if i == j: square = self.gaussian_init.generate(rng, (size, size)) else: square = self.identity.generate(rng, (size, size)) row = numpy.hstack(row, square) result.vstack(row) return result.astype(theano.config.floatX)
def __init__(self, std=1, mean=0): self.gaussian_init = IsotropicGaussian(std = std, mean = mean) self.identity = Identity()
def main(config, tr_stream, dev_stream, use_bokeh=False, the_task=None, the_track=None): config['the_task'] = the_task # Create Theano variables logger.info('Creating theano variables') source_sentence = tensor.lmatrix('source') source_sentence_mask = tensor.matrix('source_mask') target_sentence = tensor.lmatrix('target') target_sentence_mask = tensor.matrix('target_mask') sampling_input = tensor.lmatrix('input') # Construct model logger.info('Building RNN encoder-decoder') encoder = BidirectionalEncoder( # end_embed is dimension of word embedding matrix in encoder; enc_nhids number of hidden units in encoder GRU config['src_vocab_size'], config['enc_embed'], config['enc_nhids']) decoder = Decoder( config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'], config['enc_nhids'] * 2, config['use_attention'], cost_type=config['error_fct']) cost = decoder.cost( encoder.apply(source_sentence, source_sentence_mask), source_sentence_mask, target_sentence, target_sentence_mask) testVar = decoder.getTestVar( encoder.apply(source_sentence, source_sentence_mask), source_sentence_mask, target_sentence, target_sentence_mask) logger.info('Creating computational graph') cg = ComputationGraph(cost) # Initialize model logger.info('Initializing model') my_rng = numpy.random.RandomState(config['rng_value']) if config['identity_init']: encoder.weights_init = decoder.weights_init = Identity() else: encoder.weights_init = decoder.weights_init = IsotropicGaussian( config['weight_scale']) encoder.rng = decoder.rng = my_rng encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() encoder.bidir.prototype.rng = my_rng decoder.transition.weights_init = Orthogonal() decoder.transition.rng = my_rng encoder.initialize() decoder.initialize() # apply dropout for regularization if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog logger.info('Applying dropout') dropout_inputs = [x for x in cg.intermediary_variables if x.name == 'maxout_apply_output'] cg = apply_dropout(cg, dropout_inputs, config['dropout']) # Apply weight noise for regularization if config['weight_noise_ff'] > 0.0: logger.info('Applying weight noise to ff layers') enc_params = Selector(encoder.lookup).get_params().values() enc_params += Selector(encoder.fwd_fork).get_params().values() enc_params += Selector(encoder.back_fork).get_params().values() dec_params = Selector( decoder.sequence_generator.readout).get_params().values() dec_params += Selector( decoder.sequence_generator.fork).get_params().values() dec_params += Selector(decoder.state_init).get_params().values() cg = apply_noise(cg, enc_params+dec_params, config['weight_noise_ff'], seed=my_rng) cost = cg.outputs[0] # Print shapes shapes = [param.get_value().shape for param in cg.parameters] logger.info("Parameter shapes: ") for shape, count in Counter(shapes).most_common(): logger.info(' {:15}: {}'.format(shape, count)) logger.info("Total number of parameters: {}".format(len(shapes))) # Print parameter names enc_dec_param_dict = merge(Selector(encoder).get_parameters(), Selector(decoder).get_parameters()) logger.info("Parameter names: ") for name, value in enc_dec_param_dict.items(): logger.info(' {:15}: {}'.format(value.get_value().shape, name)) logger.info("Total number of parameters: {}" .format(len(enc_dec_param_dict))) # Set up training model logger.info("Building model") training_model = Model(cost) # Set extensions logger.info("Initializing extensions") # this is ugly code and done, because I am not sure if the order of the extensions is important if 'track2' in config['saveto']: # less epochs for track 2, because of more data if config['early_stopping']: extensions = [ FinishAfter(after_n_epochs=config['finish_after']/2), #FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] else: extensions = [ FinishAfter(after_n_epochs=config['finish_after']/2), #FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] else: if config['early_stopping']: extensions = [ FinishAfter(after_n_epochs=config['finish_after']), #FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] else: extensions = [ FinishAfter(after_n_epochs=config['finish_after']), #FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] # Set up beam search and sampling computation graphs if necessary if config['hook_samples'] >= 1: logger.info("Building sampling model") sampling_representation = encoder.apply( sampling_input, tensor.ones(sampling_input.shape)) generated = decoder.generate(sampling_input, sampling_representation) search_model = Model(generated) _, samples = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")( ComputationGraph(generated[1])) # generated[1] is next_outputs # Add sampling if config['hook_samples'] >= 1: logger.info("Building sampler") extensions.append( Sampler(model=search_model, data_stream=tr_stream, hook_samples=config['hook_samples'], #every_n_batches=1, every_n_batches=config['sampling_freq'], src_vocab_size=8)) #src_vocab_size=config['src_vocab_size'])) # Add early stopping based on bleu if config['val_set'] is not None: logger.info("Building accuracy validator") extensions.append( AccuracyValidator(sampling_input, samples=samples, config=config, model=search_model, data_stream=dev_stream, after_training=True, #after_epoch=True)) every_n_epochs=5)) else: logger.info("No validation set given for this language") # Reload model if necessary if config['reload']: extensions.append(LoadNMT(config['saveto'])) # Set up training algorithm logger.info("Initializing training algorithm") algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=CompositeRule([StepClipping(config['step_clipping']), eval(config['step_rule'])()]) ) # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop( model=training_model, algorithm=algorithm, data_stream=tr_stream, extensions=extensions ) # Train! main_loop.run()
softmax_out = softmax.apply(pre_softmax.reshape((-1, o_dim))) softmax_out = softmax_out.reshape(shape) softmax_out.name = 'softmax_out' # comparing only last time-step cost = CategoricalCrossEntropy().apply(y[-1, :, 0], softmax_out[-1]) cost.name = 'CrossEntropy' error_rate = MisclassificationRate().apply(y[-1, :, 0], softmax_out[-1]) error_rate.name = 'error_rate' # Initialization for brick in (x_to_h1, h1_to_o): brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0) brick.initialize() rnn.weights_init = Identity() rnn.biases_init = Constant(0) rnn.initialize() print 'Bulding training process...' algorithm = GradientDescent(cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=learning_algorithm( learning_rate=1e-6, momentum=0.0, clipping_threshold=1.0, algorithm='adam')) train_stream, valid_stream = MNIST(batch_size=batch_size) monitor_train_cost = TrainingDataMonitoring([cost, error_rate],
def test_identity(): assert str(Identity(2.0)).endswith(' mult=2.0>')
print "EVALled a thing" res = out.shape.eval({x: np.ones((6, 9, 19), dtype=floatX)}) print res ### Identity testing from blocks.initialization import Identity, IsotropicGaussian from blocks import bricks from blocks.bricks import Sigmoid dim = 2 floatX = theano.config.floatX x = tensor.tensor3('input') gru = GatedRecurrentFull( hidden_dim=dim, state_to_state_init=Identity(1.), #state_to_reset_init=Identity(1.), state_to_reset_init=IsotropicGaussian(0.2), state_to_update_init=Identity(1.0), activation=bricks.Identity(1.0), gate_activation=Sigmoid(), input_to_state_transform=Linear( input_dim=dim, output_dim=dim, weights_init=Identity(1.0), #weights_init=IsotropicGaussian(0.02), biases_init=Constant(0.0)), input_to_update_transform=Linear( input_dim=dim, output_dim=dim, #weights_init=Constant(0.0),
def initialize(self): self.model.initialize() Identity().initialize(self.rnn.W_state, self.rnn.rng)
def testing_init(brick): brick.weights_init = Identity() brick.biases_init = Constant(0) brick.initialize()
def __init__(self, std=1, mean=0): self.gaussian_init = IsotropicGaussian(std=std, mean=mean) self.identity = Identity()
def set_up(self, config=None, make_prunable=False): """Loads and initializes all the theano variables for the training model and the decoding model. Args: config (dict): NMT configuration """ if config: self.config = config else: config = self.config # Create Theano variables logging.debug('Creating theano variables') source_sentence_mask = tensor.matrix('source_mask') target_sentence_mask = tensor.matrix('target_mask') # Construct model (fs439: Add NoLookup options) if config['dec_layers'] != 1: logging.fatal("Only dec_layers=1 supported.") logging.debug('Building RNN encoder-decoder') if config['src_sparse_feat_map']: if config['enc_layers'] != 1: logging.fatal("Only enc_layers=1 supported for sparse " "source features.") source_sentence = tensor.tensor3('source') self.sampling_input = tensor.tensor3('input') encoder = NoLookupEncoder(config['enc_embed'], config['enc_nhids']) else: source_sentence = tensor.lmatrix('source') self.sampling_input = tensor.lmatrix('input') if config['enc_layers'] > 1 and not config['enc_share_weights']: encoder = DeepBidirectionalEncoder( config['src_vocab_size'], config['enc_embed'], config['enc_layers'], config['enc_skip_connections'], config['enc_nhids']) else: encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['enc_layers'], config['enc_skip_connections'], config['enc_nhids']) if config['trg_sparse_feat_map']: target_sentence = tensor.tensor3('target') decoder = NoLookupDecoder( config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'], config['att_nhids'], config['maxout_nhids'], config['enc_nhids'] * 2, config['attention'], config['dec_attention_sources'], config['dec_readout_sources'], config['memory'], config['memory_size'], config['seq_len'], config['dec_init']) else: target_sentence = tensor.lmatrix('target') decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'], config['att_nhids'], config['maxout_nhids'], config['enc_nhids'] * 2, config['attention'], config['dec_attention_sources'], config['dec_readout_sources'], config['memory'], config['memory_size'], config['seq_len'], config['dec_init'], make_prunable=make_prunable) if config['annotations'] != 'direct': annotators = [] add_direct = False for name in config['annotations'].split(','): if name == 'direct': add_direct = True elif name == 'hierarchical': annotators.append(HierarchicalAnnotator(encoder)) else: logging.fatal("Annotation strategy %s unknown" % name) encoder = EncoderWithAnnotators(encoder, annotators, add_direct) annotations, annotations_mask = encoder.apply(source_sentence, source_sentence_mask) self.cost = decoder.cost(annotations, annotations_mask, target_sentence, target_sentence_mask) logging.info('Creating computational graph') self.cg = ComputationGraph(self.cost) # Initialize model logging.info('Initializing model') encoder.weights_init = decoder.weights_init = Identity() # encoder.weights_init = decoder.weights_init = IsotropicGaussian( # config['weight_scale']) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() try: # encoder.bidir.prototype.weights_init = Orthogonal() encoder.bidir.prototype.weights_init = Identity() except AttributeError: pass # Its fine, no bidirectional encoder decoder.transition.weights_init = Identity() # decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() # apply dropout for regularization if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog logging.info('Applying dropout') dropout_inputs = [ x for x in self.cg.intermediary_variables if x.name == 'maxout_apply_output' ] self.cg = apply_dropout(self.cg, dropout_inputs, config['dropout']) # Apply weight noise for regularization if config['weight_noise_ff'] > 0.0: logging.info('Applying weight noise to ff layers') if encoder.lookup: enc_params = Selector(encoder.lookup).get_parameters().values() enc_params += Selector(encoder.fwd_fork).get_parameters().values() enc_params += Selector(encoder.back_fork).get_parameters().values() dec_params = Selector( decoder.sequence_generator.readout).get_parameters().values() dec_params += Selector( decoder.sequence_generator.fork).get_parameters().values() self.cg = apply_noise(self.cg, enc_params + dec_params, config['weight_noise_ff']) # Print shapes shapes = [param.get_value().shape for param in self.cg.parameters] logging.debug("Parameter shapes: ") for shape, count in Counter(shapes).most_common(): logging.debug(' {:15}: {}'.format(shape, count)) logging.debug("Total number of CG parameters: {}".format(len(shapes))) # Print parameter names enc_dec_param_dict = merge( Selector(encoder).get_parameters(), Selector(decoder).get_parameters()) logging.debug("Parameter names: ") for name, value in enc_dec_param_dict.items(): logging.debug(' {:15}: {}'.format(value.get_value().shape, name)) logging.info("Total number of parameters: {}".format( len(enc_dec_param_dict))) # Set up training model logging.info("Building model") self.training_model = Model(self.cost) logging.info("Building sampling model") src_shape = (self.sampling_input.shape[-2], self.sampling_input.shape[-1]) # batch_size x sen_length sampling_representation, _ = encoder.apply(self.sampling_input, tensor.ones(src_shape)) generated = decoder.generate(src_shape, sampling_representation) self.search_model = Model(generated) generated_outputs = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")( ComputationGraph(generated[1])) # generated[1] is next_outputs self.samples = generated_outputs[1] self.encoder = encoder self.decoder = decoder
def main_run(_config, _log): from collections import namedtuple c = namedtuple("Config", _config.keys())(*_config.values()) _log.info("Running with" + str(_config)) import theano from theano import tensor as T import numpy as np from dataset import IMDBText, GloveTransformer from blocks.initialization import Uniform, Constant, IsotropicGaussian, NdarrayInitialization, Identity, Orthogonal from blocks.bricks.recurrent import LSTM, SimpleRecurrent, GatedRecurrent from blocks.bricks.parallel import Fork from blocks.bricks import Linear, Sigmoid, Tanh, Rectifier from blocks import bricks from blocks.extensions import Printing, Timing from blocks.extensions.monitoring import (DataStreamMonitoring, TrainingDataMonitoring) from blocks.extensions.plot import Plot from plot import PlotHistogram from blocks.algorithms import GradientDescent, Adam, Scale, StepClipping, CompositeRule, AdaDelta from blocks.graph import ComputationGraph, apply_dropout from blocks.main_loop import MainLoop from blocks.model import Model from cuboid.algorithms import AdaM, NAG from cuboid.extensions import EpochProgress from fuel.streams import DataStream, ServerDataStream from fuel.transformers import Padding from fuel.schemes import ShuffledScheme from Conv1D import Conv1D, MaxPooling1D from schemes import BatchwiseShuffledScheme from bricks import WeightedSigmoid, GatedRecurrentFull from multiprocessing import Process import fuel import logging from initialization import SumInitialization from transformers import DropSources global train_p global test_p x = T.tensor3('features') #m = T.matrix('features_mask') y = T.imatrix('targets') #x = x+m.mean()*0 dropout_variables = [] embedding_size = 300 glove_version = "glove.6B.300d.txt" #embedding_size = 50 #glove_version = "vectors.6B.50d.txt" gloveMapping = Linear( input_dim=embedding_size, output_dim=c.rnn_input_dim, weights_init=Orthogonal(), #weights_init = IsotropicGaussian(c.wstd), biases_init=Constant(0.0), name="gloveMapping") gloveMapping.initialize() o = gloveMapping.apply(x) o = Rectifier(name="gloveRec").apply(o) dropout_variables.append(o) summed_mapped_glove = o.sum(axis=1) # take out the sequence glove_out = Linear(input_dim=c.rnn_input_dim, output_dim=1.0, weights_init=IsotropicGaussian(c.wstd), biases_init=Constant(0.0), name="mapping_to_output") glove_out.initialize() deeply_sup_0 = glove_out.apply(summed_mapped_glove) deeply_sup_probs = Sigmoid(name="deeply_sup_softmax").apply(deeply_sup_0) input_dim = c.rnn_input_dim hidden_dim = c.rnn_dim gru = GatedRecurrentFull( hidden_dim=hidden_dim, activation=Tanh(), #activation=bricks.Identity(), gate_activation=Sigmoid(), state_to_state_init=SumInitialization( [Identity(1.0), IsotropicGaussian(c.wstd)]), state_to_reset_init=IsotropicGaussian(c.wstd), state_to_update_init=IsotropicGaussian(c.wstd), input_to_state_transform=Linear(input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), biases_init=Constant(0.0)), input_to_update_transform=Linear( input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), #biases_init=Constant(-2.0)), biases_init=Constant(-1.0)), input_to_reset_transform=Linear( input_dim=input_dim, output_dim=hidden_dim, weights_init=IsotropicGaussian(c.wstd), #biases_init=Constant(-3.0)) biases_init=Constant(-2.0))) gru.initialize() rnn_in = o.dimshuffle(1, 0, 2) #rnn_in = o #rnn_out = gru.apply(rnn_in, mask=m.T) rnn_out = gru.apply(rnn_in) state_to_state = gru.rnn.state_to_state state_to_state.name = "state_to_state" #o = rnn_out[-1, :, :] o = rnn_out[-1] #o = rnn_out[:, -1, :] #o = rnn_out.mean(axis=1) #print rnn_last_out.eval({ #x: np.ones((3, 101, 300), dtype=theano.config.floatX), #m: np.ones((3, 101), dtype=theano.config.floatX)}) #raw_input() #o = rnn_out.mean(axis=1) dropout_variables.append(o) score_layer = Linear(input_dim=hidden_dim, output_dim=1, weights_init=IsotropicGaussian(std=c.wstd), biases_init=Constant(0.), name="linear2") score_layer.initialize() o = score_layer.apply(o) probs = Sigmoid().apply(o) #probs = deeply_sup_probs cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean() #cost_deeply_sup0 = - (y * T.log(deeply_sup_probs) + (1-y) * T.log(1 - deeply_sup_probs)).mean() # cost += cost_deeply_sup0 * c.deeply_factor cost.name = 'cost' misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean() misclassification.name = 'misclassification' #print rnn_in.shape.eval( #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), #}) #print rnn_out.shape.eval( #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).sum(axis=1).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #raw_input() # ================= cg = ComputationGraph([cost]) cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5) params = cg.parameters algorithm = GradientDescent( cost=cg.outputs[0], params=params, step_rule=CompositeRule([ StepClipping(threshold=4), Adam(learning_rate=0.002, beta1=0.1, beta2=0.001), #NAG(lr=0.1, momentum=0.9), #AdaDelta(), ])) # ======== print "setting up data" ports = { 'gpu0_train': 5557, 'gpu0_test': 5558, 'cuda0_train': 5557, 'cuda0_test': 5558, 'opencl0:0_train': 5557, 'opencl0:0_test': 5558, 'gpu1_train': 5559, 'gpu1_test': 5560, } #batch_size = 16 #batch_size = 32 batch_size = 40 def start_server(port, which_set): fuel.server.logger.setLevel('WARN') dataset = IMDBText(which_set, sorted=True) n_train = dataset.num_examples #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size) scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size) stream = DataStream(dataset=dataset, iteration_scheme=scheme) print "loading glove" glove = GloveTransformer(glove_version, data_stream=stream) padded = Padding( data_stream=glove, #mask_sources=('features',) mask_sources=('features', )) padded = DropSources(padded, ['features_mask']) fuel.server.start_server(padded, port=port, hwm=20) train_port = ports[theano.config.device + '_train'] train_p = Process(target=start_server, args=(train_port, 'train')) train_p.start() test_port = ports[theano.config.device + '_test'] test_p = Process(target=start_server, args=(test_port, 'test')) test_p.start() #train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port) #test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port) train_stream = ServerDataStream(('features', 'targets'), port=train_port) test_stream = ServerDataStream(('features', 'targets'), port=test_port) print "setting up model" #ipdb.set_trace() n_examples = 25000 print "Batches per epoch", n_examples // (batch_size + 1) batches_extensions = 100 monitor_rate = 50 #====== model = Model(cg.outputs[0]) extensions = [] extensions.append( EpochProgress(batch_per_epoch=n_examples // batch_size + 1)) extensions.append( TrainingDataMonitoring( [cost, misclassification], prefix='train', every_n_batches=monitor_rate, )) extensions.append( DataStreamMonitoring([cost, misclassification], data_stream=test_stream, prefix='test', after_epoch=True, before_first_epoch=False)) extensions.append(Timing()) extensions.append(Printing()) #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True)) #extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True)) #extensions.append(PlotHistogram( #channels=['train_state_to_state'], #bins=50, #every_n_batches=30)) extensions.append( Plot(theano.config.device + "_result", channels=[['train_cost'], ['train_misclassification']], every_n_batches=monitor_rate)) main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()