예제 #1
0
def gen_model(model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)

    # DICTIONARY = "lexicon.txt"
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    return (f_init, f_next, tparams, trng)
예제 #2
0
    def _build(self):
        print 'Building model...'

        # build the sampling functions and model
        self.trng = RandomStreams(1234)
        use_noise = theano.shared(numpy.float32(0.), name='use_noise')

        params = capgen.init_params(self.options)
        params = capgen.load_params(self.model, params)
        self.tparams = capgen.init_tparams(params)

        # word index
        self.f_init, self.f_next = capgen.build_sampler(self.tparams, self.options, use_noise, self.trng)

        self.trng, use_noise, inps, \
        alphas, alphas_samples, cost, opt_outs = capgen.build_model(self.tparams, self.options)

        # get the alphas and selector value [called \beta in the paper]

        # create update rules for the stochastic attention
        hard_attn_updates = []
        if self.options['attn_type'] == 'stochastic':
            baseline_time = theano.shared(numpy.float32(0.), name='baseline_time')
            hard_attn_updates += [(baseline_time, baseline_time * 0.9 + 0.1 * opt_outs['masked_cost'].mean())]
            hard_attn_updates += opt_outs['attn_updates']
            
        self.f_alpha = theano.function(inps, alphas, name='f_alpha', updates=hard_attn_updates)
        if self.options['selector']:
            self.f_sels = theano.function(inps, opt_outs['selector'], name='f_sels', updates=hard_attn_updates)

        print 'Done'
예제 #3
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

	# tparams_list = []
    # f_init_list = []
    # f_next_list = []

    # for m in model:
    #     params = init_params(options)
    #     params = load_params(m, params)
    #     tparams_list.append( init_tparams(params) )
    #     f_init, f_next = build_sampler(tparams_list[-1], options, use_noise, trng, sampling=sampling)
    #     f_init_list.append( f_init )
    #     f_next_list.append( f_next )

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)


    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)

        #sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
        #                           trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        #if normalize:
        lengths = numpy.array([len(s) for s in sample])
        score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx], score[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break
        idx, context = req[0], req[1]
        print pid, '-', idx
        seq, score = _gencap(context)
        rqueue.put((idx, seq, score))

    return 
예제 #4
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict,
              sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams,
                                   f_init,
                                   f_next,
                                   cc0,
                                   options,
                                   trng=trng,
                                   k=k,
                                   maxlen=200,
                                   stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]
        print pid, '-', idx
        seq = _gencap(context)
        rqueue.put((idx, seq))

    return
예제 #5
0
def load_model(model_path):
    # print 'loading model'
    model = model_path
    options = load_pkl(model + '.pkl')
    # build the sampling functions and model
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')
    params = capgen.init_params(options)
    params = capgen.load_params(model, params)
    tparams = capgen.init_tparams(params)
    f_init, f_next = capgen.build_sampler(tparams, options, use_noise, trng)
    trng, use_noise, inps, alphas, alphas_samples, cost, opt_outs = capgen.build_model(tparams, options)
    # print 'done'
    return tparams, f_init, f_next, options, trng
예제 #6
0
def gen_model(model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)

    # DICTIONARY = "lexicon.txt"
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)
    #trie = tr.TrieNode()
    #    #WordCount=0
    #    for word in open(DICTIONARY, "rt").read().split():
    #        word = string.lower(word)
    #
    #        WordCount += 1
    #        trie.insert( word )
    #
    #    print "Read %d words" % WordCount
    #
    #    def _gencap(cc0):
    #        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
    #                                   trng=trng, k=k, maxlen=200, stochastic=False,alpha=0.0)
    #        # adjust for length bias
    #        if normalize:
    #            lengths = numpy.array([len(s) for s in sample])
    #            score = score / lengths
    #        sidx = numpy.argsort(score)
    #        return [sample[i] for i in sidx]
    #    seq = _gencap(context)

    return (f_init, f_next, tparams, trng)
예제 #7
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    from capgen import build_sampler, gen_sample, load_params, init_params, init_tparams
    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    print 'For the first time'
    k2 = theano.shared(numpy.random.rand(10000, 100).astype('float32'))
    print 'its done'
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)
    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)
    print 'done finished now ...'
    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]
	print 'i \'m here now ....'
    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break
        idx, context = req[0], req[1]
        context = context.astype(numpy.float32, copy=False)
        seq = _gencap(context)
        rqueue.put((idx, seq))
    print 'i am out now!'
    return 
예제 #8
0
def main(model, saveto, k=1, normalize=False, zero_pad=False, datasets='dev,test', data_path='./', sampling=False, pkl_name=None):
    # load model model_options
    if pkl_name is None:
        pkl_name = model
    with open('%s.pkl'% pkl_name, 'rb') as f:
        options = pkl.load(f)

    # fetch data, skip ones we aren't using to save time
    load_data, prepare_data = get_dataset(options['dataset'])
    _, valid, test, worddict = load_data(load_train=False, load_dev=True if 'dev' in datasets else False,
                                             load_test=True if 'test' in datasets else False, path=data_path)

    # <eos> means end of sequence (aka periods), UNK means unknown
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # build sampler
    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see capgen.py for more detailed explanations
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    # index -> words
    def _seqs2words(cc):
        ww = []
        for w in cc:
            if w == 0:
                break
            ww.append(word_idict[w])
        return ' '.join(ww)

    # unsparsify, reshape, and queue
    def _send_job(context):
        cc = context.todense().reshape([14*14,512])
        if zero_pad:
            cc0 = numpy.zeros((cc.shape[0]+1, cc.shape[1])).astype('float32')
            cc0[:-1,:] = cc
        else:
            cc0 = cc
        return create_sample(tparams, f_init, f_next, cc0, options, trng, k, normalize)

    ds = datasets.strip().split(',')

    # send all the features for the various datasets
    for dd in ds:
        if dd == 'dev':
            bar = Bar('Development Set...', max=len(valid[1]))
            caps = []
            for i in range(len(valid[1])):
                sample = _send_job(valid[1][i])
                cap = _seqs2words(sample)
                caps.append(cap)
                with open(saveto+'_status.json', 'w') as f:
                    json.dump({'current': i, 'total': len(valid[1])}, f)
                bar.next()
            bar.finish()
            with open(saveto, 'w') as f:
                print >>f, '\n'.join(caps)
            print 'Done'
        if dd == 'test':
            print 'Test Set...',
            caps = []
            for i in range(len(test[1])):
                sample = _send_job(test[1][i])
                cap = _seqs2words(sample)
                caps.append(cap)
                with open(saveto+'_status.json', 'w') as f:
                    json.dump({'current': i, 'total': len(test[1])}, f)
                bar.next()
            bar.finish()
            with open(saveto, 'w') as f:
                print >>f, '\n'.join(caps)
            print 'Done'
    word_idict[vv] = kk
word_idict[0] = '<eos>'
word_idict[1] = 'UNK'


# ## Creating the Theano Graph  

# In[42]:

# build the sampling functions and model
trng = RandomStreams(1234)
use_noise = theano.shared(numpy.float32(0.), name='use_noise')

params = capgen.init_params(options)
params = capgen.load_params(model, params)
tparams = capgen.init_tparams(params)

# word index
f_init, f_next = capgen.build_sampler(tparams, options, use_noise, trng)


# In[43]:

trng,use_noise,inps, alphas, alphas_samples,cost, opt_outs = capgen.build_model(tparams, options)


# In[44]:

# get the alphas and selector value [called \beta in the paper]

# create update rules for the stochastic attention