def jobman(state, channel): # load dataset rng = numpy.random.RandomState(state['seed']) # declare the dimensionalies of the input and output if state['chunks'] == 'words': state['n_in'] = 10000 state['n_out'] = 10000 else: state['n_in'] = 50 state['n_out'] = 50 train_data, valid_data, test_data = get_text_data(state) ## BEGIN Tutorial ### Define Theano Input Variables x = TT.lvector('x') y = TT.lvector('y') h0 = theano.shared( numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32')) ### Neural Implementation of the Operators: \oplus #### Word Embedding emb_words = MultiLayer(rng, n_in=state['n_in'], n_hids=eval(state['inp_nhids']), activation=eval(state['inp_activ']), init_fn='sample_weights_classic', weight_noise=state['weight_noise'], rank_n_approx=state['rank_n_approx'], scale=state['inp_scale'], sparsity=state['inp_sparse'], learn_bias=True, bias_scale=eval(state['inp_bias']), name='emb_words') #### Deep Transition Recurrent Layer rec = eval(state['rec_layer'])( rng, eval(state['nhids']), activation=eval(state['rec_activ']), #activation = 'TT.nnet.sigmoid', bias_scale=eval(state['rec_bias']), scale=eval(state['rec_scale']), sparsity=eval(state['rec_sparse']), init_fn=eval(state['rec_init']), weight_noise=state['weight_noise'], name='rec') #### Stiching them together ##### (1) Get the embedding of a word x_emb = emb_words(x, no_noise_bias=state['no_noise_bias']) ##### (2) Embedding + Hidden State via DT Recurrent Layer reset = TT.scalar('reset') rec_layer = rec(x_emb, n_steps=x.shape[0], init_state=h0 * reset, no_noise_bias=state['no_noise_bias'], truncate_gradient=state['truncate_gradient'], batch_size=1) ## BEGIN Exercise: DOT-RNN ### Neural Implementation of the Operators: \lhd #### Exercise (1) #### TODO: Define a layer from the hidden state to the intermediate layer emb_layer = MultiLayer(rng, ) #### Exercise (1) #### TODO: Define a layer from the input to the intermediate Layer #### Hidden State: Combine emb_state and emb_words_out #### Exercise (1) #### TODO: Define an activation layer #### Exercise (2) #### TODO: Define a dropout layer #### Softmax Layer output_layer = SoftmaxLayer(rng, eval(state['dout_nhid']), state['n_out'], scale=state['out_scale'], bias_scale=state['out_bias_scale'], init_fn="sample_weights_classic", weight_noise=state['weight_noise'], sparsity=state['out_sparse'], sum_over_time=True, name='out') ### Few Optional Things #### Direct shortcut from x to y if state['shortcut_inpout']: shortcut = MultiLayer(rng, n_in=state['n_in'], n_hids=eval(state['inpout_nhids']), activations=eval(state['inpout_activ']), init_fn='sample_weights_classic', weight_noise=state['weight_noise'], scale=eval(state['inpout_scale']), sparsity=eval(state['inpout_sparse']), learn_bias=eval(state['inpout_learn_bias']), bias_scale=eval(state['inpout_bias']), name='shortcut') #### Learning rate scheduling (1/(1+n/beta)) state['clr'] = state['lr'] def update_lr(obj, cost): stp = obj.step if isinstance(obj.state['lr_start'], int) and stp > obj.state['lr_start']: time = float(stp - obj.state['lr_start']) new_lr = obj.state['clr'] / (1 + time / obj.state['lr_beta']) obj.lr = new_lr if state['lr_adapt']: rec.add_schedule(update_lr) ### Neural Implementations of the Language Model #### Training if state['shortcut_inpout']: additional_inputs = [rec_layer, shortcut(x)] else: additional_inputs = [rec_layer] ##### Exercise (1): Compute the output intermediate layer ##### TODO: Compute the output intermediate layer ##### Exercise (2): Apply Dropout ##### TODO: Apply the dropout layer train_model = output_layer(outhid, no_noise_bias=state['no_noise_bias'], additional_inputs=additional_inputs).train( target=y, scale=numpy.float32(1. / state['seqlen'])) nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1] if state['carry_h0']: train_model.updates += [(h0, nw_h0)] #### Validation h0val = theano.shared( numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32')) rec_layer = rec(emb_words(x, use_noise=False), n_steps=x.shape[0], batch_size=1, init_state=h0val * reset, use_noise=False) nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1] ##### Exercise (1): ##### TODO: Compute the output intermediate layer ##### Exercise (2): Apply Dropout ##### TODO: Apply the dropout layer without noise if state['shortcut_inpout']: additional_inputs = [rec_layer, shortcut(x, use_noise=False)] else: additional_inputs = [rec_layer] valid_model = output_layer(outhid, additional_inputs=additional_inputs, use_noise=False).validate(target=y, sum_over_time=True) valid_updates = [] if state['carry_h0']: valid_updates = [(h0val, nw_h0)] valid_fn = theano.function([x, y, reset], valid_model.cost, name='valid_fn', updates=valid_updates) #### Sampling ##### single-step sampling def sample_fn(word_tm1, h_tm1): x_emb = emb_words(word_tm1, use_noise=False, one_step=True) h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1] outhid = outhid_dropout(outhid_activ( emb_state(h0, use_noise=False, one_step=True) + emb_words_out(word_tm1, use_noise=False, one_step=True), one_step=True), use_noise=False, one_step=True) word = output_layer.get_sample(state_below=outhid, additional_inputs=[h0], temp=1.) return word, h0 ##### scan for iterating the single-step sampling multiple times [samples, summaries], updates = scan(sample_fn, states=[ TT.alloc(numpy.int64(0), state['sample_steps']), TT.alloc(numpy.float32(0), 1, eval(state['nhids'])[-1]) ], n_steps=state['sample_steps'], name='sampler_scan') ##### build a Theano function for sampling sample_fn = theano.function([], [samples], updates=updates, profile=False, name='sample_fn') ##### Load a dictionary dictionary = numpy.load(state['dictionary']) if state['chunks'] == 'chars': dictionary = dictionary['unique_chars'] else: dictionary = dictionary['unique_words'] def hook_fn(): sample = sample_fn()[0] print 'Sample:', if state['chunks'] == 'chars': print "".join(dictionary[sample]) else: for si in sample: print dictionary[si], print ### Build and Train a Model #### Define a model model = LM_Model(cost_layer=train_model, weight_noise_amount=state['weight_noise_amount'], valid_fn=valid_fn, clean_before_noise_fn=False, noise_fn=None, rng=rng) if state['reload']: model.load(state['prefix'] + 'model.npz') #### Define a trainer ##### Training algorithm (SGD) if state['moment'] < 0: algo = SGD(model, state, train_data) else: algo = SGD_m(model, state, train_data) ##### Main loop of the trainer main = MainLoop(train_data, valid_data, test_data, model, algo, state, channel, train_cost=False, hooks=hook_fn, validate_postprocess=eval(state['validate_postprocess'])) ## Run! main.main()
model_path = state['prefix'] + 'model.npz' timings_path = state['prefix'] + 'timing.npz' try: print "Loading model" model = LM_Model(cost_layer=train_model, weight_noise_amount=state['weight_noise_amount'], valid_fn=valid_fn, clean_before_noise_fn=False, noise_fn=None, test_verbosity=0, cost_per_sample=True, indx_word=state['dictionary'], rng=rng) model.load(model_path) print "Model loaded" except Exception: print 'mainLoop: Corrupted model file' traceback.print_exc() try: timings = dict(numpy.load(timings_path).iteritems()) except Exception: print 'mainLoop: Corrupted timings file' traceback.print_exc() data_model = numpy.load(state['path']) indx = data_model['vocabulary'].item() test_iter = BatchTxtIterator( state, # LM state
def jobman(state, channel): # load dataset rng = numpy.random.RandomState(state['seed']) # declare the dimensionalies of the input and output if state['chunks'] == 'words': state['n_in'] = 10000 state['n_out'] = 10000 else: state['n_in'] = 50 state['n_out'] = 50 train_data, valid_data, test_data = get_text_data(state) ## BEGIN Tutorial ### Define Theano Input Variables x = TT.lvector('x') y = TT.lvector('y') h0 = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32')) ### Neural Implementation of the Operators: \oplus #### Word Embedding emb_words = MultiLayer( rng, n_in=state['n_in'], n_hids=eval(state['inp_nhids']), activation=eval(state['inp_activ']), init_fn='sample_weights_classic', weight_noise=state['weight_noise'], rank_n_approx = state['rank_n_approx'], scale=state['inp_scale'], sparsity=state['inp_sparse'], learn_bias = True, bias_scale=eval(state['inp_bias']), name='emb_words') #### Deep Transition Recurrent Layer rec = eval(state['rec_layer'])( rng, eval(state['nhids']), activation = eval(state['rec_activ']), #activation = 'TT.nnet.sigmoid', bias_scale = eval(state['rec_bias']), scale=eval(state['rec_scale']), sparsity=eval(state['rec_sparse']), init_fn=eval(state['rec_init']), weight_noise=state['weight_noise'], name='rec') #### Stiching them together ##### (1) Get the embedding of a word x_emb = emb_words(x, no_noise_bias=state['no_noise_bias']) ##### (2) Embedding + Hidden State via DT Recurrent Layer reset = TT.scalar('reset') rec_layer = rec(x_emb, n_steps=x.shape[0], init_state=h0*reset, no_noise_bias=state['no_noise_bias'], truncate_gradient=state['truncate_gradient'], batch_size=1) ## BEGIN Exercise: DOT-RNN ### Neural Implementation of the Operators: \lhd #### Exercise (1) #### TODO: Define a layer from the hidden state to the intermediate layer #### Exercise (1) #### TODO: Define a layer from the input to the intermediate Layer #### Hidden State: Combine emb_state and emb_words_out #### Exercise (1) #### TODO: Define an activation layer #### Exercise (2) #### TODO: Define a dropout layer #### Softmax Layer output_layer = SoftmaxLayer( rng, eval(state['dout_nhid']), state['n_out'], scale=state['out_scale'], bias_scale=state['out_bias_scale'], init_fn="sample_weights_classic", weight_noise=state['weight_noise'], sparsity=state['out_sparse'], sum_over_time=True, name='out') ### Few Optional Things #### Direct shortcut from x to y if state['shortcut_inpout']: shortcut = MultiLayer( rng, n_in=state['n_in'], n_hids=eval(state['inpout_nhids']), activations=eval(state['inpout_activ']), init_fn='sample_weights_classic', weight_noise = state['weight_noise'], scale=eval(state['inpout_scale']), sparsity=eval(state['inpout_sparse']), learn_bias=eval(state['inpout_learn_bias']), bias_scale=eval(state['inpout_bias']), name='shortcut') #### Learning rate scheduling (1/(1+n/beta)) state['clr'] = state['lr'] def update_lr(obj, cost): stp = obj.step if isinstance(obj.state['lr_start'], int) and stp > obj.state['lr_start']: time = float(stp - obj.state['lr_start']) new_lr = obj.state['clr']/(1+time/obj.state['lr_beta']) obj.lr = new_lr if state['lr_adapt']: rec.add_schedule(update_lr) ### Neural Implementations of the Language Model #### Training if state['shortcut_inpout']: additional_inputs = [rec_layer, shortcut(x)] else: additional_inputs = [rec_layer] ##### Exercise (1): Compute the output intermediate layer ##### TODO: Compute the output intermediate layer ##### Exercise (2): Apply Dropout ##### TODO: Apply the dropout layer train_model = output_layer(outhid, no_noise_bias=state['no_noise_bias'], additional_inputs=additional_inputs).train(target=y, scale=numpy.float32(1./state['seqlen'])) nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1] if state['carry_h0']: train_model.updates += [(h0, nw_h0)] #### Validation h0val = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32')) rec_layer = rec(emb_words(x, use_noise=False), n_steps = x.shape[0], batch_size=1, init_state=h0val*reset, use_noise=False) nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1] ##### Exercise (1): ##### TODO: Compute the output intermediate layer ##### Exercise (2): Apply Dropout ##### TODO: Apply the dropout layer without noise if state['shortcut_inpout']: additional_inputs=[rec_layer, shortcut(x, use_noise=False)] else: additional_inputs=[rec_layer] valid_model = output_layer(outhid, additional_inputs=additional_inputs, use_noise=False).validate(target=y, sum_over_time=True) valid_updates = [] if state['carry_h0']: valid_updates = [(h0val, nw_h0)] valid_fn = theano.function([x,y, reset], valid_model.out, name='valid_fn', updates=valid_updates) #### Sampling ##### single-step sampling def sample_fn(word_tm1, h_tm1): x_emb = emb_words(word_tm1, use_noise = False, one_step=True) h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1] outhid = outhid_dropout(outhid_activ(emb_state(h0, use_noise=False, one_step=True) + emb_words_out(word_tm1, use_noise=False, one_step=True), one_step=True), use_noise=False, one_step=True) word = output_layer.get_sample(state_below=outhid, additional_inputs=[h0], temp=1.) return word, h0 ##### scan for iterating the single-step sampling multiple times [samples, summaries], updates = scan(sample_fn, states = [ TT.alloc(numpy.int64(0), state['sample_steps']), TT.alloc(numpy.float32(0), 1, eval(state['nhids'])[-1])], n_steps= state['sample_steps'], name='sampler_scan') ##### build a Theano function for sampling sample_fn = theano.function([], [samples], updates=updates, profile=False, name='sample_fn') ##### Load a dictionary dictionary = numpy.load(state['dictionary']) if state['chunks'] == 'chars': dictionary = dictionary['unique_chars'] else: dictionary = dictionary['unique_words'] def hook_fn(): sample = sample_fn()[0] print 'Sample:', if state['chunks'] == 'chars': print "".join(dictionary[sample]) else: for si in sample: print dictionary[si], print ### Build and Train a Model #### Define a model model = LM_Model( cost_layer = train_model, weight_noise_amount=state['weight_noise_amount'], valid_fn = valid_fn, clean_before_noise_fn = False, noise_fn = None, rng = rng) if state['reload']: model.load(state['prefix']+'model.npz') #### Define a trainer ##### Training algorithm (SGD) if state['moment'] < 0: algo = SGD(model, state, train_data) else: algo = SGD_m(model, state, train_data) ##### Main loop of the trainer main = MainLoop(train_data, valid_data, test_data, model, algo, state, channel, train_cost = False, hooks = hook_fn, validate_postprocess = eval(state['validate_postprocess'])) ## Run! main.main()
try: print "Loading model" model = LM_Model( cost_layer = train_model, weight_noise_amount=state['weight_noise_amount'], valid_fn = valid_fn, clean_before_noise_fn = False, noise_fn = None, test_verbosity = 0, cost_per_sample=True, indx_word=state['dictionary'], rng = rng) model.load(model_path) print "Model loaded" except Exception: print 'mainLoop: Corrupted model file' traceback.print_exc() try: timings = dict(numpy.load(timings_path).iteritems()) except Exception: print 'mainLoop: Corrupted timings file' traceback.print_exc() data_model = numpy.load(state['path']) indx = data_model['vocabulary'].item() test_iter = BatchTxtIterator(state, # LM state