Exemple #1
0
def jobman(state, channel):
    # load dataset
    rng = numpy.random.RandomState(state['seed'])

    # declare the dimensionalies of the input and output
    if state['chunks'] == 'words':
        state['n_in'] = 10000
        state['n_out'] = 10000
    else:
        state['n_in'] = 50
        state['n_out'] = 50
    train_data, valid_data, test_data = get_text_data(state)

    ## BEGIN Tutorial
    ### Define Theano Input Variables
    x = TT.lvector('x')
    y = TT.lvector('y')
    h0 = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))

    ### Neural Implementation of the Operators: \oplus
    #### Word Embedding
    emb_words = MultiLayer(rng,
                           n_in=state['n_in'],
                           n_hids=eval(state['inp_nhids']),
                           activation=eval(state['inp_activ']),
                           init_fn='sample_weights_classic',
                           weight_noise=state['weight_noise'],
                           rank_n_approx=state['rank_n_approx'],
                           scale=state['inp_scale'],
                           sparsity=state['inp_sparse'],
                           learn_bias=True,
                           bias_scale=eval(state['inp_bias']),
                           name='emb_words')

    #### Deep Transition Recurrent Layer
    rec = eval(state['rec_layer'])(
        rng,
        eval(state['nhids']),
        activation=eval(state['rec_activ']),
        #activation = 'TT.nnet.sigmoid',
        bias_scale=eval(state['rec_bias']),
        scale=eval(state['rec_scale']),
        sparsity=eval(state['rec_sparse']),
        init_fn=eval(state['rec_init']),
        weight_noise=state['weight_noise'],
        name='rec')

    #### Stiching them together
    ##### (1) Get the embedding of a word
    x_emb = emb_words(x, no_noise_bias=state['no_noise_bias'])
    ##### (2) Embedding + Hidden State via DT Recurrent Layer
    reset = TT.scalar('reset')
    rec_layer = rec(x_emb,
                    n_steps=x.shape[0],
                    init_state=h0 * reset,
                    no_noise_bias=state['no_noise_bias'],
                    truncate_gradient=state['truncate_gradient'],
                    batch_size=1)

    ## BEGIN Exercise: DOT-RNN
    ### Neural Implementation of the Operators: \lhd

    #### Exercise (1)
    #### TODO: Define a layer from the hidden state to the intermediate layer
    emb_layer = MultiLayer(rng, )

    #### Exercise (1)
    #### TODO: Define a layer from the input to the intermediate Layer

    #### Hidden State: Combine emb_state and emb_words_out
    #### Exercise (1)
    #### TODO: Define an activation layer

    #### Exercise (2)
    #### TODO: Define a dropout layer

    #### Softmax Layer
    output_layer = SoftmaxLayer(rng,
                                eval(state['dout_nhid']),
                                state['n_out'],
                                scale=state['out_scale'],
                                bias_scale=state['out_bias_scale'],
                                init_fn="sample_weights_classic",
                                weight_noise=state['weight_noise'],
                                sparsity=state['out_sparse'],
                                sum_over_time=True,
                                name='out')

    ### Few Optional Things
    #### Direct shortcut from x to y
    if state['shortcut_inpout']:
        shortcut = MultiLayer(rng,
                              n_in=state['n_in'],
                              n_hids=eval(state['inpout_nhids']),
                              activations=eval(state['inpout_activ']),
                              init_fn='sample_weights_classic',
                              weight_noise=state['weight_noise'],
                              scale=eval(state['inpout_scale']),
                              sparsity=eval(state['inpout_sparse']),
                              learn_bias=eval(state['inpout_learn_bias']),
                              bias_scale=eval(state['inpout_bias']),
                              name='shortcut')

    #### Learning rate scheduling (1/(1+n/beta))
    state['clr'] = state['lr']

    def update_lr(obj, cost):
        stp = obj.step
        if isinstance(obj.state['lr_start'],
                      int) and stp > obj.state['lr_start']:
            time = float(stp - obj.state['lr_start'])
            new_lr = obj.state['clr'] / (1 + time / obj.state['lr_beta'])
            obj.lr = new_lr

    if state['lr_adapt']:
        rec.add_schedule(update_lr)

    ### Neural Implementations of the Language Model
    #### Training
    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x)]
    else:
        additional_inputs = [rec_layer]

    ##### Exercise (1): Compute the output intermediate layer
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer

    train_model = output_layer(outhid,
                               no_noise_bias=state['no_noise_bias'],
                               additional_inputs=additional_inputs).train(
                                   target=y,
                                   scale=numpy.float32(1. / state['seqlen']))

    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]
    if state['carry_h0']:
        train_model.updates += [(h0, nw_h0)]

    #### Validation
    h0val = theano.shared(
        numpy.zeros((eval(state['nhids'])[-1], ), dtype='float32'))
    rec_layer = rec(emb_words(x, use_noise=False),
                    n_steps=x.shape[0],
                    batch_size=1,
                    init_state=h0val * reset,
                    use_noise=False)
    nw_h0 = rec_layer.out[rec_layer.out.shape[0] - 1]

    ##### Exercise (1):
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer without noise

    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x, use_noise=False)]
    else:
        additional_inputs = [rec_layer]
    valid_model = output_layer(outhid,
                               additional_inputs=additional_inputs,
                               use_noise=False).validate(target=y,
                                                         sum_over_time=True)

    valid_updates = []
    if state['carry_h0']:
        valid_updates = [(h0val, nw_h0)]

    valid_fn = theano.function([x, y, reset],
                               valid_model.cost,
                               name='valid_fn',
                               updates=valid_updates)

    #### Sampling
    ##### single-step sampling
    def sample_fn(word_tm1, h_tm1):
        x_emb = emb_words(word_tm1, use_noise=False, one_step=True)
        h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1]
        outhid = outhid_dropout(outhid_activ(
            emb_state(h0, use_noise=False, one_step=True) +
            emb_words_out(word_tm1, use_noise=False, one_step=True),
            one_step=True),
                                use_noise=False,
                                one_step=True)
        word = output_layer.get_sample(state_below=outhid,
                                       additional_inputs=[h0],
                                       temp=1.)
        return word, h0

    ##### scan for iterating the single-step sampling multiple times
    [samples, summaries], updates = scan(sample_fn,
                                         states=[
                                             TT.alloc(numpy.int64(0),
                                                      state['sample_steps']),
                                             TT.alloc(numpy.float32(0), 1,
                                                      eval(state['nhids'])[-1])
                                         ],
                                         n_steps=state['sample_steps'],
                                         name='sampler_scan')

    ##### build a Theano function for sampling
    sample_fn = theano.function([], [samples],
                                updates=updates,
                                profile=False,
                                name='sample_fn')

    ##### Load a dictionary
    dictionary = numpy.load(state['dictionary'])
    if state['chunks'] == 'chars':
        dictionary = dictionary['unique_chars']
    else:
        dictionary = dictionary['unique_words']

    def hook_fn():
        sample = sample_fn()[0]
        print 'Sample:',
        if state['chunks'] == 'chars':
            print "".join(dictionary[sample])
        else:
            for si in sample:
                print dictionary[si],
            print

    ### Build and Train a Model
    #### Define a model
    model = LM_Model(cost_layer=train_model,
                     weight_noise_amount=state['weight_noise_amount'],
                     valid_fn=valid_fn,
                     clean_before_noise_fn=False,
                     noise_fn=None,
                     rng=rng)

    if state['reload']:
        model.load(state['prefix'] + 'model.npz')

    #### Define a trainer
    ##### Training algorithm (SGD)
    if state['moment'] < 0:
        algo = SGD(model, state, train_data)
    else:
        algo = SGD_m(model, state, train_data)
    ##### Main loop of the trainer
    main = MainLoop(train_data,
                    valid_data,
                    test_data,
                    model,
                    algo,
                    state,
                    channel,
                    train_cost=False,
                    hooks=hook_fn,
                    validate_postprocess=eval(state['validate_postprocess']))
    ## Run!
    main.main()
Exemple #2
0
model_path = state['prefix'] + 'model.npz'
timings_path = state['prefix'] + 'timing.npz'

try:
    print "Loading model"
    model = LM_Model(cost_layer=train_model,
                     weight_noise_amount=state['weight_noise_amount'],
                     valid_fn=valid_fn,
                     clean_before_noise_fn=False,
                     noise_fn=None,
                     test_verbosity=0,
                     cost_per_sample=True,
                     indx_word=state['dictionary'],
                     rng=rng)
    model.load(model_path)
    print "Model loaded"
except Exception:
    print 'mainLoop: Corrupted model file'
    traceback.print_exc()
try:
    timings = dict(numpy.load(timings_path).iteritems())
except Exception:
    print 'mainLoop: Corrupted timings file'
    traceback.print_exc()

data_model = numpy.load(state['path'])
indx = data_model['vocabulary'].item()

test_iter = BatchTxtIterator(
    state,  # LM state
def jobman(state, channel):
    # load dataset
    rng = numpy.random.RandomState(state['seed'])

    # declare the dimensionalies of the input and output
    if state['chunks'] == 'words':
        state['n_in'] = 10000
        state['n_out'] = 10000
    else:
        state['n_in'] = 50
        state['n_out'] = 50
    train_data, valid_data, test_data = get_text_data(state)

    ## BEGIN Tutorial
    ### Define Theano Input Variables
    x = TT.lvector('x')
    y = TT.lvector('y')
    h0 = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32'))

    ### Neural Implementation of the Operators: \oplus
    #### Word Embedding
    emb_words = MultiLayer(
        rng,
        n_in=state['n_in'],
        n_hids=eval(state['inp_nhids']),
        activation=eval(state['inp_activ']),
        init_fn='sample_weights_classic',
        weight_noise=state['weight_noise'],
        rank_n_approx = state['rank_n_approx'],
        scale=state['inp_scale'],
        sparsity=state['inp_sparse'],
        learn_bias = True,
        bias_scale=eval(state['inp_bias']),
        name='emb_words')

    #### Deep Transition Recurrent Layer
    rec = eval(state['rec_layer'])(
            rng,
            eval(state['nhids']),
            activation = eval(state['rec_activ']),
            #activation = 'TT.nnet.sigmoid',
            bias_scale = eval(state['rec_bias']),
            scale=eval(state['rec_scale']),
            sparsity=eval(state['rec_sparse']),
            init_fn=eval(state['rec_init']),
            weight_noise=state['weight_noise'],
            name='rec')

    #### Stiching them together
    ##### (1) Get the embedding of a word
    x_emb = emb_words(x, no_noise_bias=state['no_noise_bias'])
    ##### (2) Embedding + Hidden State via DT Recurrent Layer
    reset = TT.scalar('reset')
    rec_layer = rec(x_emb, n_steps=x.shape[0],
                    init_state=h0*reset,
                    no_noise_bias=state['no_noise_bias'],
                    truncate_gradient=state['truncate_gradient'],
                    batch_size=1)

    ## BEGIN Exercise: DOT-RNN
    ### Neural Implementation of the Operators: \lhd

    #### Exercise (1)
    #### TODO: Define a layer from the hidden state to the intermediate layer

    #### Exercise (1)
    #### TODO: Define a layer from the input to the intermediate Layer

    #### Hidden State: Combine emb_state and emb_words_out
    #### Exercise (1)
    #### TODO: Define an activation layer

    #### Exercise (2)
    #### TODO: Define a dropout layer

    #### Softmax Layer
    output_layer = SoftmaxLayer(
        rng,
        eval(state['dout_nhid']),
        state['n_out'],
        scale=state['out_scale'],
        bias_scale=state['out_bias_scale'],
        init_fn="sample_weights_classic",
        weight_noise=state['weight_noise'],
        sparsity=state['out_sparse'],
        sum_over_time=True,
        name='out')

    ### Few Optional Things
    #### Direct shortcut from x to y
    if state['shortcut_inpout']:
        shortcut = MultiLayer(
            rng,
            n_in=state['n_in'],
            n_hids=eval(state['inpout_nhids']),
            activations=eval(state['inpout_activ']),
            init_fn='sample_weights_classic',
            weight_noise = state['weight_noise'],
            scale=eval(state['inpout_scale']),
            sparsity=eval(state['inpout_sparse']),
            learn_bias=eval(state['inpout_learn_bias']),
            bias_scale=eval(state['inpout_bias']),
            name='shortcut')

    #### Learning rate scheduling (1/(1+n/beta))
    state['clr'] = state['lr']
    def update_lr(obj, cost):
        stp = obj.step
        if isinstance(obj.state['lr_start'], int) and stp > obj.state['lr_start']:
            time = float(stp - obj.state['lr_start'])
            new_lr = obj.state['clr']/(1+time/obj.state['lr_beta'])
            obj.lr = new_lr
    if state['lr_adapt']:
        rec.add_schedule(update_lr)

    ### Neural Implementations of the Language Model
    #### Training
    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x)]
    else:
        additional_inputs = [rec_layer]

    ##### Exercise (1): Compute the output intermediate layer
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer

    train_model = output_layer(outhid,
                               no_noise_bias=state['no_noise_bias'],
                               additional_inputs=additional_inputs).train(target=y,
            scale=numpy.float32(1./state['seqlen']))

    nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1]
    if state['carry_h0']:
        train_model.updates += [(h0, nw_h0)]

    #### Validation
    h0val = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32'))
    rec_layer = rec(emb_words(x, use_noise=False),
                    n_steps = x.shape[0],
                    batch_size=1,
                    init_state=h0val*reset,
                    use_noise=False)
    nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1]

    ##### Exercise (1):
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer without noise

    if state['shortcut_inpout']:
        additional_inputs=[rec_layer, shortcut(x, use_noise=False)]
    else:
        additional_inputs=[rec_layer]
    valid_model = output_layer(outhid,
            additional_inputs=additional_inputs,
            use_noise=False).validate(target=y, sum_over_time=True)

    valid_updates = []
    if state['carry_h0']:
        valid_updates = [(h0val, nw_h0)]

    valid_fn = theano.function([x,y, reset], valid_model.out,
          name='valid_fn', updates=valid_updates)

    #### Sampling
    ##### single-step sampling
    def sample_fn(word_tm1, h_tm1):
        x_emb = emb_words(word_tm1, use_noise = False, one_step=True)
        h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1]
        outhid = outhid_dropout(outhid_activ(emb_state(h0, use_noise=False, one_step=True) +
            emb_words_out(word_tm1, use_noise=False, one_step=True), one_step=True), 
            use_noise=False, one_step=True)
        word = output_layer.get_sample(state_below=outhid, additional_inputs=[h0], temp=1.)
        return word, h0

    ##### scan for iterating the single-step sampling multiple times
    [samples, summaries], updates = scan(sample_fn,
                      states = [
                          TT.alloc(numpy.int64(0), state['sample_steps']),
                          TT.alloc(numpy.float32(0), 1, eval(state['nhids'])[-1])],
                      n_steps= state['sample_steps'],
                      name='sampler_scan')

    ##### build a Theano function for sampling
    sample_fn = theano.function([], [samples],
        updates=updates, profile=False, name='sample_fn')

    ##### Load a dictionary
    dictionary = numpy.load(state['dictionary'])
    if state['chunks'] == 'chars':
        dictionary = dictionary['unique_chars']
    else:
        dictionary = dictionary['unique_words']
    def hook_fn():
        sample = sample_fn()[0]
        print 'Sample:',
        if state['chunks'] == 'chars':
            print "".join(dictionary[sample])
        else:
            for si in sample:
                print dictionary[si],
            print

    ### Build and Train a Model
    #### Define a model
    model = LM_Model(
        cost_layer = train_model,
        weight_noise_amount=state['weight_noise_amount'],
        valid_fn = valid_fn,
        clean_before_noise_fn = False,
        noise_fn = None,
        rng = rng)

    if state['reload']:
        model.load(state['prefix']+'model.npz')

    #### Define a trainer
    ##### Training algorithm (SGD)
    if state['moment'] < 0:
        algo = SGD(model, state, train_data)
    else:
        algo = SGD_m(model, state, train_data)
    ##### Main loop of the trainer
    main = MainLoop(train_data,
                    valid_data,
                    test_data,
                    model,
                    algo,
                    state,
                    channel,
                    train_cost = False,
                    hooks = hook_fn,
                    validate_postprocess =  eval(state['validate_postprocess']))
    ## Run!
    main.main()
Exemple #4
0

 
try:
    print "Loading model" 
    model = LM_Model(
        cost_layer = train_model,
        weight_noise_amount=state['weight_noise_amount'],
        valid_fn =  valid_fn,
        clean_before_noise_fn = False,
        noise_fn = None,
        test_verbosity = 0,
        cost_per_sample=True,
        indx_word=state['dictionary'],
        rng = rng)
    model.load(model_path)
    print "Model loaded"
except Exception:
    print 'mainLoop: Corrupted model file'
    traceback.print_exc()
try:
    timings = dict(numpy.load(timings_path).iteritems())
except Exception:
    print 'mainLoop: Corrupted timings file'
    traceback.print_exc()

data_model = numpy.load(state['path'])
indx = data_model['vocabulary'].item()


test_iter = BatchTxtIterator(state,     # LM state