def gen_model(model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # DICTIONARY = "lexicon.txt" # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) return (f_init, f_next, tparams, trng)
def _build(self): print 'Building model...' # build the sampling functions and model self.trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.), name='use_noise') params = capgen.init_params(self.options) params = capgen.load_params(self.model, params) self.tparams = capgen.init_tparams(params) # word index self.f_init, self.f_next = capgen.build_sampler(self.tparams, self.options, use_noise, self.trng) self.trng, use_noise, inps, \ alphas, alphas_samples, cost, opt_outs = capgen.build_model(self.tparams, self.options) # get the alphas and selector value [called \beta in the paper] # create update rules for the stochastic attention hard_attn_updates = [] if self.options['attn_type'] == 'stochastic': baseline_time = theano.shared(numpy.float32(0.), name='baseline_time') hard_attn_updates += [(baseline_time, baseline_time * 0.9 + 0.1 * opt_outs['masked_cost'].mean())] hard_attn_updates += opt_outs['attn_updates'] self.f_alpha = theano.function(inps, alphas, name='f_alpha', updates=hard_attn_updates) if self.options['selector']: self.f_sels = theano.function(inps, opt_outs['selector'], name='f_sels', updates=hard_attn_updates) print 'Done'
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # tparams_list = [] # f_init_list = [] # f_next_list = [] # for m in model: # params = init_params(options) # params = load_params(m, params) # tparams_list.append( init_tparams(params) ) # f_init, f_next = build_sampler(tparams_list[-1], options, use_noise, trng, sampling=sampling) # f_init_list.append( f_init ) # f_next_list.append( f_next ) # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) def _gencap(cc0): sample, score = gen_sample(tparams, f_init, f_next, cc0, options, trng=trng, k=k, maxlen=200, stochastic=False) #sample, score = gen_sample(tparams, f_init, f_next, cc0, options, # trng=trng, k=k, maxlen=200, stochastic=False) # adjust for length bias #if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx], score[sidx] while True: req = queue.get() # exit signal if req is None: break idx, context = req[0], req[1] print pid, '-', idx seq, score = _gencap(context) rqueue.put((idx, seq, score)) return
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) def _gencap(cc0): sample, score = gen_sample(tparams, f_init, f_next, cc0, options, trng=trng, k=k, maxlen=200, stochastic=False) # adjust for length bias if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx] while True: req = queue.get() # exit signal if req is None: break idx, context = req[0], req[1] print pid, '-', idx seq = _gencap(context) rqueue.put((idx, seq)) return
def load_model(model_path): # print 'loading model' model = model_path options = load_pkl(model + '.pkl') # build the sampling functions and model trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.), name='use_noise') params = capgen.init_params(options) params = capgen.load_params(model, params) tparams = capgen.init_tparams(params) f_init, f_next = capgen.build_sampler(tparams, options, use_noise, trng) trng, use_noise, inps, alphas, alphas_samples, cost, opt_outs = capgen.build_model(tparams, options) # print 'done' return tparams, f_init, f_next, options, trng
def gen_model(model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # DICTIONARY = "lexicon.txt" # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) #trie = tr.TrieNode() # #WordCount=0 # for word in open(DICTIONARY, "rt").read().split(): # word = string.lower(word) # # WordCount += 1 # trie.insert( word ) # # print "Read %d words" % WordCount # # def _gencap(cc0): # sample, score = gen_sample(tparams, f_init, f_next, cc0, options, # trng=trng, k=k, maxlen=200, stochastic=False,alpha=0.0) # # adjust for length bias # if normalize: # lengths = numpy.array([len(s) for s in sample]) # score = score / lengths # sidx = numpy.argsort(score) # return [sample[i] for i in sidx] # seq = _gencap(context) return (f_init, f_next, tparams, trng)
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from capgen import build_sampler, gen_sample, load_params, init_params, init_tparams trng = RandomStreams(1234) # this is zero indicate we are not using dropout in the graph print 'For the first time' k2 = theano.shared(numpy.random.rand(10000, 100).astype('float32')) print 'its done' use_noise = theano.shared(numpy.float32(0.), name='use_noise') params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) print 'done finished now ...' def _gencap(cc0): sample, score = gen_sample(tparams, f_init, f_next, cc0, options, trng=trng, k=k, maxlen=200, stochastic=False) # adjust for length bias if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx] print 'i \'m here now ....' while True: req = queue.get() # exit signal if req is None: break idx, context = req[0], req[1] context = context.astype(numpy.float32, copy=False) seq = _gencap(context) rqueue.put((idx, seq)) print 'i am out now!' return
def main(model, saveto, k=1, normalize=False, zero_pad=False, datasets='dev,test', data_path='./', sampling=False, pkl_name=None): # load model model_options if pkl_name is None: pkl_name = model with open('%s.pkl'% pkl_name, 'rb') as f: options = pkl.load(f) # fetch data, skip ones we aren't using to save time load_data, prepare_data = get_dataset(options['dataset']) _, valid, test, worddict = load_data(load_train=False, load_dev=True if 'dev' in datasets else False, load_test=True if 'test' in datasets else False, path=data_path) # <eos> means end of sequence (aka periods), UNK means unknown word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # build sampler trng = RandomStreams(1234) # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see capgen.py for more detailed explanations f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) # index -> words def _seqs2words(cc): ww = [] for w in cc: if w == 0: break ww.append(word_idict[w]) return ' '.join(ww) # unsparsify, reshape, and queue def _send_job(context): cc = context.todense().reshape([14*14,512]) if zero_pad: cc0 = numpy.zeros((cc.shape[0]+1, cc.shape[1])).astype('float32') cc0[:-1,:] = cc else: cc0 = cc return create_sample(tparams, f_init, f_next, cc0, options, trng, k, normalize) ds = datasets.strip().split(',') # send all the features for the various datasets for dd in ds: if dd == 'dev': bar = Bar('Development Set...', max=len(valid[1])) caps = [] for i in range(len(valid[1])): sample = _send_job(valid[1][i]) cap = _seqs2words(sample) caps.append(cap) with open(saveto+'_status.json', 'w') as f: json.dump({'current': i, 'total': len(valid[1])}, f) bar.next() bar.finish() with open(saveto, 'w') as f: print >>f, '\n'.join(caps) print 'Done' if dd == 'test': print 'Test Set...', caps = [] for i in range(len(test[1])): sample = _send_job(test[1][i]) cap = _seqs2words(sample) caps.append(cap) with open(saveto+'_status.json', 'w') as f: json.dump({'current': i, 'total': len(test[1])}, f) bar.next() bar.finish() with open(saveto, 'w') as f: print >>f, '\n'.join(caps) print 'Done'
word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # ## Creating the Theano Graph # In[42]: # build the sampling functions and model trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.), name='use_noise') params = capgen.init_params(options) params = capgen.load_params(model, params) tparams = capgen.init_tparams(params) # word index f_init, f_next = capgen.build_sampler(tparams, options, use_noise, trng) # In[43]: trng,use_noise,inps, alphas, alphas_samples,cost, opt_outs = capgen.build_model(tparams, options) # In[44]: # get the alphas and selector value [called \beta in the paper] # create update rules for the stochastic attention