Exemple #1
0
def main():
    args = parse_args()

    state = getattr(experiments.nmt, args.proto)()
    if args.state:
        if args.state.endswith(".py"):
            state.update(eval(open(args.state).read()))
        else:
            with open(args.state) as src:
                state.update(cPickle.load(src))
    for change in args.changes:
        state.update(eval("dict({})".format(change)))

    logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
    logger.debug("State:\n{}".format(pprint.pformat(state)))

    rng = numpy.random.RandomState(state['seed'])
    if args.proto == 'prototype_ntm_state' or args.proto == 'prototype_ntmencdec_state':
        print 'Neural Turing Machine'
        enc_dec = NTMEncoderDecoder(state, rng, args.skip_init)
    else:
        enc_dec = RNNEncoderDecoder(state, rng, args.skip_init)
    enc_dec.build()
    lm_model = enc_dec.create_lm_model()

    logger.debug("Load data")
    train_data = get_batch_iterator(state)
    logger.debug("Compile trainer")
    algo = eval(state['algo'])(lm_model, state, train_data)
    logger.debug("Run training")
    main = MainLoop(train_data, None, None, lm_model, algo, state, None,
            reset=state['reset'],
            hooks=[RandomSamplePrinter(state, lm_model, train_data)]
                if state['hookFreq'] >= 0
                else None)
    if state['reload']:
        main.load()
    if state['loopIters'] > 0:
        main.main()
def main():
    args = parse_args()

    state = prototype_state()
    with open(args.state) as src:
        state.update(cPickle.load(src))
    state.update(eval("dict({})".format(args.changes)))

    logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    rng = numpy.random.RandomState(state['seed'])
    enc_dec = NTMEncoderDecoder(state, rng, skip_init=True)
    enc_dec.build()
    lm_model = enc_dec.create_lm_model()
    lm_model.load(args.model_path)
    indx_word = cPickle.load(open(state['word_indx'],'rb'))

    sampler = None
    beam_search = None
    if args.beam_search:
        beam_search = BeamSearch(enc_dec)
        beam_search.compile()
    else:
        sampler = enc_dec.create_sampler(many_samples=True)

    idict_src = cPickle.load(open(state['indx_word'],'r'))

    if args.source and args.trans:
        # Actually only beam search is currently supported here
        assert beam_search
        assert args.beam_size

        fsrc = open(args.source, 'r')
        ftrans = open(args.trans, 'w')

        start_time = time.time()

        n_samples = args.beam_size
        total_cost = 0.0
        logging.debug("Beam size: {}".format(n_samples))
        for i, line in enumerate(fsrc):
            seqin = line.strip()
            print seqin
            seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src)
            if args.verbose:
                print "Parsed Input:", parsed_in
            trans, costs, _ = sample(lm_model, seq, n_samples, sampler=sampler,
                    beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize)
            best = numpy.argmin(costs)
            print trans[best]
            print costs[best]
            print >>ftrans, trans[best]
            if args.verbose:
                print "Translation:", trans[best]
            total_cost += costs[best]
            if (i + 1)  % 100 == 0:
                ftrans.flush()
                logger.debug("Current speed is {} per sentence".
                        format((time.time() - start_time) / (i + 1)))
        print "Total cost of the translations: {}".format(total_cost)

        fsrc.close()
        ftrans.close()
    else:
        while True:
            try:
                seqin = raw_input('Input Sequence: ')
                n_samples = int(raw_input('How many samples? '))
                alpha = None
                if not args.beam_search:
                    alpha = float(raw_input('Inverse Temperature? '))
                seq,parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src)
                print "Parsed Input:", parsed_in
            except Exception:
                print "Exception while parsing your input:"
                traceback.print_exc()
                continue

            sample(lm_model, seq, n_samples, sampler=sampler,
                    beam_search=beam_search,
                    ignore_unk=args.ignore_unk, normalize=args.normalize,
                    alpha=alpha, verbose=True)
Exemple #3
0
def main():
    args = parse_args()

    state = getattr(experiments.nmt, args.proto)()

    if args.state:
        if args.state.endswith(".py"):
            state.update(eval(open(args.state).read()))
        else:
            with open(args.state) as src:
                state.update(cPickle.load(src))
    for change in args.changes:
        state.update(eval("dict({})".format(change)))

    logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
    logger.debug("State:\n{}".format(pprint.pformat(state)))

    rng = numpy.random.RandomState(state['seed'])
    if args.proto == 'prototype_ntm_state' or args.proto == 'prototype_ntmencdec_state':
        print 'Neural Turing Machine'
        enc_dec = NTMEncoderDecoder(state, rng, args.skip_init)
    else:
        enc_dec = RNNEncoderDecoder(state, rng, args.skip_init)
    enc_dec.build()
    lm_model = enc_dec.create_lm_model()

    #s_enc_dec = RNNEncoderDecoder(state, rng, args.skip_init)
    #s_lm_model = s_enc_dec.create_lm_model()
    
    logger.debug("Load data")
    train_data = get_batch_iterator(state)
    train_data.start(-1)
    logger.debug("Compile trainer")
    #algo = eval(state['algo'])(lm_model, state, train_data)

    #algo()
    #train
    print '---test training---'
    
    for i in range(1):

        batch = train_data.next()
        #print batch
        x = batch['x']
        print x.shape
        print x
        xs = x[:,78:79]
        xsample = x[:,78]
        print xs.shape
        print xs
        print xsample
        y = batch['y']
        ys = y[:,78:79]

        print y.shape
        x_mask = batch['x_mask']
        xs_mask = x_mask[:,78:79]
        y_mask = batch['y_mask']
        ys_mask = y_mask[:,78:79]
        if not (args.proto == 'prototype_ntm_state' or args.proto == 'prototype_ntmencdec_state'):
            print '---search---'
            train_outputs = enc_dec.forward_training.rvalss+[enc_dec.predictions.out]
            test_train = theano.function(inputs=[enc_dec.x, enc_dec.x_mask, enc_dec.y, enc_dec.y_mask],
                                        outputs=train_outputs)
            result = test_train(x,x_mask,y,y_mask)
            for i in result:
                print i.shape
        else:
            print '---ntm---'
            train_outputs = enc_dec.forward_training.rvalss+[
                        enc_dec.training_c.out,
                        enc_dec.forward_training_c.out,
                        enc_dec.forward_training_m.out,
                        enc_dec.forward_training_rw.out,
                        enc_dec.backward_training_c.out,
                        enc_dec.backward_training_m.out,
                        enc_dec.backward_training_rw.out,
                        ]
            train_outputs = enc_dec.forward_training.rvalss+[\
                        enc_dec.predictions.out,
                        enc_dec.training_c.out
                        ]
            test_train = theano.function(inputs=[enc_dec.x, enc_dec.x_mask, enc_dec.y, enc_dec.y_mask],
                                        outputs=train_outputs)
            result = test_train(x,x_mask,y,y_mask)
            for i in result:
                print i.shape
            #small batch test
            print '---small---'
            results = test_train(xs,xs_mask,ys,ys_mask)
            for i in results:
                print i.shape
            print '---compare---'
            #print result[1][:,4,:,:]
            #print results[1][:,0,:,:]
            print results[-1].shape
            print result[-1][:,78,:]-results[-1][:,0,:]
            print numpy.sum(result[-1][:,78,:]-results[-1][:,0,:])
            #print numpy.sum(result[0][:,4,:]-results[0][:,0,:])
            tmp = copy.deepcopy(result[-1][:,78,:])
            tmpm = copy.deepcopy(result[1][:,78,:,:])
            


    #sample
    #batch = train_data.next()
    #print batch
    print '---test sampling---'
    x = [7,152,429,731,10239,1127,747,480,30000]
    n_samples=10
    n_steps=10
    T=1
    inps = [enc_dec.sampling_x,
            enc_dec.n_samples,
            enc_dec.n_steps,
            enc_dec.T]
    #test_sample = theano.function(inputs=[enc_dec.sampling_x],
    #                            outputs=[enc_dec.sample])
    test_outputs = [enc_dec.sampling_c,
                    enc_dec.forward_sampling_c,
                    enc_dec.forward_sampling_m,
                    enc_dec.forward_sampling_rw,
                    enc_dec.backward_sampling_c,
                    enc_dec.backward_sampling_m,
                    enc_dec.backward_sampling_rw
                    ]
    test_outputs = enc_dec.forward_sampling.rvalss#+[enc_dec.sample,enc_dec.sample_log_prob,enc_dec.sampling_updates]
    #test_outputs = [enc_dec.sample,enc_dec.sample_log_prob]
    #sample_fn = theano.function(inputs=inps,outputs=test_outputs)
    sampler = enc_dec.create_sampler(many_samples=True)
    result = sampler(n_samples, n_steps,T,xsample)
    #print result
    print '---single repr---'
    
    c,m = enc_dec.create_representation_computer()(x)
    states = map(lambda x : x[None, :], enc_dec.create_initializers()(c))

    #print states
    print states[0].shape
    print m[-1:].shape
    '''
    next = enc_dec.create_next_states_computer(c, 0, inputs, m[-1:],*states)
    #print next[0]
    #print next[1]
    print next[0].shape
    print next[1].shape

    print c
    print m
    '''
    print '---repr compare---'
    print c.shape
    print m.shape
    print c-tmp[0:c.shape[0],:]
    print numpy.sum(c-tmp[0:c.shape[0],:],axis=1)
    print m-tmpm[0:m.shape[0],:,:]
    print numpy.sum(m-tmp[0:m.shape[0],:,:],axis=1)

    return