def init_params(options, preemb=None): """ Initialize all parameters """ params = OrderedDict() # Word embedding if preemb == None: params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) else: print('using preemb') params['Wemb'] = preemb # Encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Decoder: next sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f', nin=options['dim_word'], dim=options['dim']) # Decoder: previous sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b', nin=options['dim_word'], dim=options['dim']) # Output layer params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') im = tensor.matrix('im', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences (source) if options['encoder'] == 'bow': sents = (emb * mask[:,:,None]).sum(0) else: proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) sents = proj[0][-1] sents = l2norm(sents) # Encode images (source) images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') # Compute loss cost = contrastive_loss(options['margin'], images, sents) return trng, [x, mask, im], cost
def init_params(options, preemb=None): """ Initialize all parameters """ params = OrderedDict() # Word embedding if preemb == None: params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) else: params['Wemb'] = preemb # init state params = get_layer('ff')[0](options, params, prefix='ff_state', nin=options['dimctx'], nout=options['dim']) # Decoder params = get_layer(options['decoder'])[0](options, params, prefix='decoder', nin=options['dim_word'], dim=options['dim']) # Output layer if options['doutput']: params = get_layer('ff')[0](options, params, prefix='ff_hid', nin=options['dim'], nout=options['dim_word']) params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words']) else: params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') ctx = tensor.matrix('ctx', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Index into the word embedding matrix, shift it forward in time emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # make eos the first word (it has no prev word context), # throw out last word (it doesn't predict anything) emb_shifted = tensor.zeros_like(emb) emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1]) emb = emb_shifted # Init state init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh') # Decoder proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options, prefix='decoder', mask=mask) # Compute word probabilities if options['doutput']: hid = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_hid', activ='tanh') logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear') else: logit = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])) # Cost x_flat = x.flatten() p_flat = probs.flatten() cost = -tensor.log(p_flat[tensor.arange(x_flat.shape[0])*probs.shape[1]+x_flat]+1e-8) cost = cost.reshape([x.shape[0], x.shape[1]]) cost = (cost * mask).sum(0) cost = cost.sum() return trng, [x, mask, ctx], cost
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding # params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # We load the embeddings that we saved earlier. if options['dataset'] == 'amazon': embeddings = numpy.load( '/home/shunan/Code/skip-thoughts/experiments/amazon/word2vec_embeds.npy' ) elif options['dataset'] == 'imdb': embeddings = numpy.load( '/home/shunan/Code/skip-thoughts/experiments/imdb/skip_thought_word2vec_embeds.npy' ) params['Wemb'] = embeddings.astype('float32') # Encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Decoder: next sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f', nin=options['dim_word'], dim=options['dim']) # Decoder: previous sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b', nin=options['dim_word'], dim=options['dim']) # Output layer params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def init_params(options, rng=None): """ Initialize all network parameters and constrains. All parameters and their corresponding constraints are stored in an OrderedDict. """ params = OrderedDict() constraints = OrderedDict() input_size = 2 # number of player utilities n_hidden = [input_size] + options['hidden_units'] for i in xrange(1, len(n_hidden)): params = get_layer('hid')[0]( options, params, prefix='hidden%02d' % i, nin=n_hidden[i - 1] * (3 if options['pooling'] else 1), # 3 x parameters if pooling used nout=n_hidden[i], rng=rng, b_offset=1.) params = get_layer('softmax')[0](options, params, nin=n_hidden[-1], rng=rng) ar_layers = options['ar_layers'] for i in range(ar_layers): for p in range(2): if i == ar_layers - 1 and p == 1: # don't build ar layer for pl 2 in the last layer because it is not used continue params, constraints = get_layer('ar')[0](options, params, prefix='p%d_ar%d' % (p, i), nin=ar_layers, level=i, rng=rng, constraints=constraints) params, constraints = get_layer('output')[0](options, params, constraints, rng=rng, nin=ar_layers) return params, constraints
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') ctx = tensor.matrix('ctx', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Index into the word embedding matrix, shift it forward in time emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) emb_shifted = tensor.zeros_like(emb) emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1]) emb = emb_shifted # Init state init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh') # Decoder proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options, prefix='decoder', mask=mask) # Compute word probabilities if options['doutput']: hid = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_hid', activ='tanh') logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear') else: logit = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])) # Cost x_flat = x.flatten() p_flat = probs.flatten() cost = -tensor.log(p_flat[tensor.arange(x_flat.shape[0])*probs.shape[1]+x_flat]+1e-8) cost = cost.reshape([x.shape[0], x.shape[1]]) cost = (cost * mask).sum(0) cost = cost.sum() return trng, [x, mask, ctx], cost
def build_sampler(tparams, options, trng): # x: 1 x 1 y = tensor.vector('y_sampler', dtype='int64') init_state = tensor.matrix('init_state', dtype='float32') # if it's the first word, emb should be all zero emb = tensor.switch(y[:, None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]), tparams['Wemb'][y]) # apply one step of gru layer proj = get_layer(options['encoder'])[1](tparams, emb, options, prefix='encoder', mask=None, one_step=True, init_state=init_state) next_state = proj[0] # compute the output probability dist and sample logit_lstm = get_layer('ff')[1](tparams, next_state, options, prefix='ff_logit_lstm', activ='linear') logit_prev = get_layer('ff')[1](tparams, emb, options, prefix='ff_logit_prev', activ='linear') logit = tensor.tanh(logit_lstm + logit_prev) logit = get_layer('ff')[1](tparams, logit, options, prefix='ff_logit', activ='linear') next_probs = tensor.nnet.softmax(logit) next_sample = trng.multinomial(pvals=next_probs).argmax(1) # next word probability print 'Building f_next..', inps = [y, init_state] outs = [next_probs, next_sample, next_state] f_next = theano.function(inps, outs, name='f_next', profile=profile) print 'Done' return f_next
def build_encoder(tparams, options): """ Computation graph, encoder only """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') x_mask = tensor.matrix('x_mask', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # encoder proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return trng, x, x_mask, ctx, emb
def build_image_encoder(tparams, options): """ Encoder only, for images """ opt_ret = dict() trng = RandomStreams(1234) # image features im = tensor.matrix('im', dtype='float32') # Encode images images_mm = get_layer('ff')[1](tparams, im, options, prefix='ff_image_mm', activ='linear') if not 'attention_type' in options or options['attention_type'] == 'dot': images_mm = l2norm(images_mm) if options['use_dropout']: images_mm *= shared_dropout_layer( (n_samples, options['dim_multimodal']), use_noise, trng, retain_probability_hidden) return trng, [im], images_mm
def init_mlp_params(params, dim=1, dim_v=4096, init='glorot', gain=1., **kwargs): """ Initialize parameters for the scoring function, an MLP. :param params: :param dim: :param init: glorot initialization or uniform or normal :param gain: parameter for glorot initializer :param kwargs: :return: """ logger.warn('MLP - init: {} gain: {}'.format(init, gain)) dim_in = 2 * dim # we concatenate the forward and backward RNN dim_out = dim_v # The first layer goes from the bi-directional concatenation # to the visual feature vector dimensionality. e.g. 2 x 1000 -> 4,096 params = get_layer('ff')[0](params, prefix='mlp', nin=dim_in, nout=dim_out, ortho=False, init=init, gain=gain) return params
def build_sentence_encoder(tparams, options): """ Encoder only, for sentences """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('x_mask', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences if options['encoder'] == 'bow': sents = (emb * mask[:, :, None]).sum(0) else: proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) sents = proj[0][-1] sents = l2norm(sents) return trng, [x, mask], sents
def build_encoder(tparams, options): """ Computation graph, encoder only """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') x_mask = tensor.matrix('x_mask', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) # encoder proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return trng, x, x_mask, ctx, emb
def encode_images(tparams, options, im): im_emb = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') im_emb = l2norm(im_emb) if options['abs']: im_emb = abs(im_emb) return im_emb
def build_sentence_encoder(tparams, options): """ Encoder only, for sentences """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('x_mask', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences if options['encoder'] == 'bow': sents = (emb * mask[:,:,None]).sum(0) else: proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) sents = proj[0][-1] sents = l2norm(sents) return trng, [x, mask], sents
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Sentence encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) return params
def build_sampler(tparams, options, trng): """ Forward sampling """ ctx = tensor.matrix('ctx', dtype='float32') ctx0 = ctx print 'Building f_init...', init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh') f_init = theano.function([ctx], init_state, name='f_init', profile=False) # x: 1 x 1 y = tensor.vector('y_sampler', dtype='int64') init_state = tensor.matrix('init_state', dtype='float32') # if it's the first word, emb should be all zero emb = tensor.switch(y[:,None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]), tparams['Wemb'][y]) # decoder proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options, prefix='decoder', mask=None, one_step=True) next_state = proj[0] if next_state.ndim > 2: next_state = next_state[0] # output if options['doutput']: hid = get_layer('ff')[1](tparams, next_state, options, prefix='ff_hid', activ='tanh') logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear') else: logit = get_layer('ff')[1](tparams, next_state, options, prefix='ff_logit', activ='linear') next_probs = tensor.nnet.softmax(logit) next_sample = trng.multinomial(pvals=next_probs).argmax(1) # next word probability print 'Building f_next..', inps = [y, init_state] outs = [next_probs, next_sample, next_state] f_next = theano.function(inps, outs, name='f_next', profile=False) print 'Done' return f_init, f_next
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Encoder params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Variational layer params = get_layer(options['vae'])[0](options, params, prefix='vae', nhid=options['vae_nhid'], nlatent=options['vae_nlatent'], ndim=options['dim']) # Decoder: next sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f', nin=options['dim_word'], dim=options['dim']) # Decoder: previous sentence params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b', nin=options['dim_word'], dim=options['dim']) # Output layer params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words']) return params
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # Word embedding params['Wemb'] = norm_weight(options['n_words'], options['dim_word']) # Sentence encoder if options['encoder'] != 'bow': params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) return params
def build_sampler(tparams, options, trng): """ Forward sampling """ ctx = tensor.matrix('ctx', dtype='float32') ctx0 = ctx print 'Building f_init...', init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh') f_init = theano.function([ctx], init_state, name='f_init', profile=False) # x: 1 x 1 y = tensor.vector('y_sampler', dtype='int64') init_state = tensor.matrix('init_state', dtype='float32') # if it's the first word, emb should be all zero emb = tensor.switch(y[:,None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]), tparams['Wemb'][y]) # decoder proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options, prefix='decoder', mask=None, one_step=True) next_state = proj[0] # output if options['doutput']: hid = get_layer('ff')[1](tparams, next_state, options, prefix='ff_hid', activ='tanh') logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear') else: logit = get_layer('ff')[1](tparams, next_state, options, prefix='ff_logit', activ='linear') next_probs = tensor.nnet.softmax(logit) next_sample = trng.multinomial(pvals=next_probs).argmax(1) # next word probability print 'Building f_next..', inps = [y, init_state] outs = [next_probs, next_sample, next_state] f_next = theano.function(inps, outs, name='f_next', profile=False) print 'Done' return f_init, f_next
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') im = tensor.matrix('im', dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) # Encode sentences (source) if options['encoder'] == 'bow': sents = (emb * mask[:, :, None]).sum(0) else: proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) sents = proj[0][-1] sents = l2norm(sents) # Encode images (source) images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') # Compute loss cost = contrastive_loss(options['margin'], images, sents) return trng, [x, mask, im], cost
def encode_sentences_with_topicvector(tparams, options, x, mask, topics): n_timesteps = x.shape[0] n_samples = x.shape[1] t2gru_emb = get_layer('ff')[1](tparams, topics, options, prefix='ff_topic_vector1_emb_gru', activ='linear') # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences (source) proj = get_layer(options['encoder'])[1](tparams, emb, t2gru_emb * 0.1, options, prefix='encoder', mask=mask) s = proj[0][-1] #if options['v_norm'] == 'l2' : s = l2norm(s) #s = maxnorm2(s) if options['abs']: #s = abs(s) s = tensor.maximum(s, 0) return s
def encode_images(tparams, options, im): im_emb = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') #if options['v_norm'] == 'l2' : im_emb = l2norm(im_emb) #im_emb = maxnorm2(im_emb) if options['abs']: #im_emb = abs(im_emb) im_emb = tensor.maximum(im_emb, 0) return im_emb
def encode_topic_vector2(tparams, options, topics): t_emb = get_layer('ff')[1](tparams, topics, options, prefix='ff_topic_vector2', activ='linear') t_emb = l2norm(t_emb) #t_emb = maxnorm2(t_emb) if options['abs']: #im_emb = abs(im_emb) t_emb = tensor.maximum(t_emb, 0) return t_emb
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples x = tensor.matrix('x', dtype='int64') mask = tensor.matrix('mask', dtype='float32') im = tensor.matrix('im', dtype='float32') con = tensor.matrix('con', dtype='int64') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences (source) proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) sents = proj[0][-1] sents = l2norm(sents) # Encode images (source) images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') # Compute loss cost, updates = theano.scan(_step, sequences=con, outputs_info=tensor.alloc(0.), non_sequences = [sents, images, options['margin']], n_steps=con.shape[0], profile=False, strict=True) cost = cost[-1] return trng, [x, mask, im, con], cost
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # if using bidirectional RNN, # forward and backward embeddings are half the final MM embedding size because # they will be concatenated to form the sentence embedding sent_dim = int(options['dim'])//2 if options['bidirectional_enc'] else int(options['dim']) langs = options['langs'] for lang in langs: # word embeddings params['Wemb_%s'%lang] = norm_weight(options['n_words_%s'%lang], options['dim_word']) # encoder type (currently 'bow', 'gru' or 'lstm') if options['encoder_%s'%lang] != 'bow': for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix='encoder_%s_%i'%(lang,i) # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings nin=options['dim_word'] if i==0 else sent_dim params = get_layer(options['encoder_%s'%lang])[0](options, params, prefix=layer_name_prefix, nin=nin, dim=sent_dim) if options['bidirectional_enc']: for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix='encoder_%s_r_%i'%(lang,i) # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings nin=options['dim_word'] if i==0 else sent_dim params = get_layer(options['encoder_%s'%lang])[0](options, params, prefix=layer_name_prefix, nin=nin, dim=sent_dim) # Image encoder params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim']) return params
def encode_sentences(tparams, options, x, mask): n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # Encode sentences (source) proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=mask) s = l2norm(proj[0][-1]) if options['abs']: s = abs(s) return s
def build_image_encoder(tparams, options): """ Encoder only, for images """ opt_ret = dict() trng = RandomStreams(1234) # image features im = tensor.matrix('im', dtype='float32') # Encode images images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear') images = l2norm(images) return trng, [im], images
def build_ar_layers(x, tparams, options, features, hiddens): u1, u2 = (x[:, 0, :, :], x[:, 1, :, :].transpose(0, 2, 1)) h1, h2 = hiddens # concatinate the payoff matrix onto the final layer hidden units utility = (tensor.concatenate((u1.reshape( (u1.shape[0], 1, u1.shape[1], u1.shape[2])), h1), axis=1), tensor.concatenate((u2.reshape( (u2.shape[0], 1, u2.shape[1], u2.shape[2])), h2), axis=1)) ar_layers = options['ar_layers'] ar_lists = ([], []) opp = [None, None] weighted_feature_list = ([], []) br_list = ([], []) for i in range(ar_layers): for p in range(2): if i == (ar_layers - 1) and p == 1: continue # don't build ar layer for pl 2 in the last layer feat = features[p] ar, weighted_features, br = get_layer('ar')[1](tparams, feat, options, payoff=utility[p], prefix='p%d_ar%d' % (p, i), opposition=opp[p], level=i) n, d = ar.shape ar = ar.reshape((n, 1, d)) # make space to concat ar layers weighted_feature_list[p].append(weighted_features) if i == 0: ar_lists[p].append(ar) else: ar_lists[p].append( tensor.concatenate((ar_lists[p][i - 1], ar), axis=1)) br_list[p].append(br) # append each layer then update the opposition variable... if i < ar_layers - 1: for p in range(2): opp[1 - p] = ar_lists[p][i] # return ar_lists[0][ar_layers-1] return ar_lists, weighted_feature_list, br_list
def build_encoder_w2v(tparams, options): """ Computation graph for encoder, given pre-trained word embeddings """ opt_ret = dict() trng = RandomStreams(1234) # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') x_mask = tensor.matrix('x_mask', dtype='float32') # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return trng, embedding, x_mask, ctx
def __init__(self, net_config, data_cache): #net_config_str = json.dumps(net_config, sort_keys=True, indent=4, separators=(',', ':')) #LOGGER.info("\n" + net_config_str) self.data_cache = data_cache self.inputs = net_config.get("inputs", []) self.check_net_config(net_config) self.outputs = net_config.get("outputs", []) # layers self.layers = [] self.name2layer = {} model_disk_base = net_config.get("model_cache", {}).get("disk_base", None) for layer_config in net_config["layers"]: layer = get_layer(layer_config, self.data_cache) layer.model_disk_base = model_disk_base self.layers.append(layer) self.name2layer[layer.name] = layer
def build_mlp_predict(self, enc_states, dim_emb=0, dim=0, activation_mlp='relu', **kwargs): """ Builds an MLP scoring function for use during prediction / test time. We want this to predict a single 4096d vector for each input. :param dim_emb: :param dim: :param activation_mlp: :param kwargs: :return: TODO: Redefine the predict function so it predicts the embedding of the sentence and the image embedding. """ # set MLP activation function assert activation_mlp in ('relu', 'tanh'), \ 'MLP activation function must be tanh or relu' activation_mlp = 'lambda x: tensor.nnet.relu(x)' \ if activation_mlp == 'relu' else 'lambda x: tensor.tanh(x)' logger.warn('Using MLP activation function: {}'.format(activation_mlp)) theano_params = self.theano_params # The input to the MLP will be the mean value of the hidden states for # each instance in the minibatch. if kwargs['verbose']: logger.warn(states_mean.tag.test_value) # train a single layer MLP to do everything output = get_layer('ff')[1](theano_params, enc_states, prefix='mlp', activ=activation_mlp) return output
def build_model(tparams, options, rng=None): """ Computation graph for the model """ if rng is None: rng = numpy.random.RandomState(123) trng = RandomStreams(rng.randint(1000000)) use_noise = theano.shared(numpy.float32(0.)) x = tensor.tensor4('x') own_features, hidden1 = build_features(x, tparams, options, use_noise, trng) opp_features, hidden2 = build_features( x.transpose( (0, 1, 3, 2))[:, [1, 0], :, :], # transpose to get player 2 model tparams, options, use_noise, trng) ar, weighted_feature_list, br_list = build_ar_layers( x, tparams, options, (own_features, opp_features), (hidden1[-1], hidden2[-1])) ar_layers = options['ar_layers'] out = get_layer('output')[1](tparams, ar[0][ar_layers - 1], options) intermediate_fns = { 'ar': ar, 'own_features': own_features, 'opp_features': opp_features, 'hidden1': hidden1, 'hidden2': hidden2, 'weighted_feature_list': weighted_feature_list, 'br_list': br_list } if not options['debug']: return trng, use_noise, x, out else: return trng, use_noise, x, out, intermediate_fns
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples # x: current sentence # y: next sentence # z: previous sentence x = tensor.matrix('x', dtype='int64') x_mask = tensor.matrix('x_mask', dtype='float32') y = tensor.matrix('y', dtype='int64') y_mask = tensor.matrix('y_mask', dtype='float32') z = tensor.matrix('z', dtype='int64') z_mask = tensor.matrix('z_mask', dtype='float32') n_timesteps = x.shape[0] n_timesteps_f = y.shape[0] n_timesteps_b = z.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']]) # encoder proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] dec_ctx = ctx # Word embedding (ahead) embf = tparams['Wemb'][y.flatten()].reshape([n_timesteps_f, n_samples, options['dim_word']]) embf_shifted = tensor.zeros_like(embf) embf_shifted = tensor.set_subtensor(embf_shifted[1:], embf[:-1]) embf = embf_shifted # Word embedding (behind) embb = tparams['Wemb'][z.flatten()].reshape([n_timesteps_b, n_samples, options['dim_word']]) embb_shifted = tensor.zeros_like(embb) embb_shifted = tensor.set_subtensor(embb_shifted[1:], embb[:-1]) embb = embb_shifted # decoder (ahead) projf = get_layer(options['decoder'])[1](tparams, embf, dec_ctx, options, prefix='decoder_f', mask=y_mask) # decoder (behind) projb = get_layer(options['decoder'])[1](tparams, embb, dec_ctx, options, prefix='decoder_b', mask=z_mask) # compute word probabilities (ahead) logit = get_layer('ff')[1](tparams, projf[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])) # cost (ahead) y_flat = y.flatten() y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat costf = -tensor.log(probs.flatten()[y_flat_idx]+1e-8) costf = costf.reshape([y.shape[0],y.shape[1]]) costf = (costf * y_mask).sum(0) costf = costf.sum() # compute word probabilities (behind) logit = get_layer('ff')[1](tparams, projb[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]])) # cost (behind) z_flat = z.flatten() z_flat_idx = tensor.arange(z_flat.shape[0]) * options['n_words'] + z_flat costb = -tensor.log(probs.flatten()[z_flat_idx]+1e-8) costb = costb.reshape([z.shape[0],z.shape[1]]) costb = (costb * z_mask).sum(0) costb = costb.sum() # total cost cost = costf + costb return trng, x, x_mask, y, y_mask, z, z_mask, opt_ret, cost
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() trng = RandomStreams(1234) # description string: #words x #samples # x: current sentence # y: next sentence # z: previous sentence x = tensor.matrix('x', dtype='int64') x_mask = tensor.matrix('x_mask', dtype='float32') y = tensor.matrix('y', dtype='int64') y_mask = tensor.matrix('y_mask', dtype='float32') z = tensor.matrix('z', dtype='int64') z_mask = tensor.matrix('z_mask', dtype='float32') n_timesteps = x.shape[0] n_timesteps_f = y.shape[0] n_timesteps_b = z.shape[0] n_samples = x.shape[1] # Word embedding (source) emb = tparams['Wemb'][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) # encoder proj = get_layer(options['encoder'])[1](tparams, emb, None, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] dec_ctx = ctx # Word embedding (ahead) embf = tparams['Wemb'][y.flatten()].reshape( [n_timesteps_f, n_samples, options['dim_word']]) embf_shifted = tensor.zeros_like(embf) embf_shifted = tensor.set_subtensor(embf_shifted[1:], embf[:-1]) embf = embf_shifted # Word embedding (behind) embb = tparams['Wemb'][z.flatten()].reshape( [n_timesteps_b, n_samples, options['dim_word']]) embb_shifted = tensor.zeros_like(embb) embb_shifted = tensor.set_subtensor(embb_shifted[1:], embb[:-1]) embb = embb_shifted # decoder (ahead) projf = get_layer(options['decoder'])[1](tparams, embf, dec_ctx, options, prefix='decoder_f', mask=y_mask) # decoder (behind) projb = get_layer(options['decoder'])[1](tparams, embb, dec_ctx, options, prefix='decoder_b', mask=z_mask) # compute word probabilities (ahead) logit = get_layer('ff')[1](tparams, projf[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax( logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]])) # cost (ahead) y_flat = y.flatten() y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat costf = -tensor.log(probs.flatten()[y_flat_idx] + 1e-8) costf = costf.reshape([y.shape[0], y.shape[1]]) costf = (costf * y_mask).sum(0) costf = costf.sum() # compute word probabilities (behind) logit = get_layer('ff')[1](tparams, projb[0], options, prefix='ff_logit', activ='linear') logit_shp = logit.shape probs = tensor.nnet.softmax( logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]])) # cost (behind) z_flat = z.flatten() z_flat_idx = tensor.arange(z_flat.shape[0]) * options['n_words'] + z_flat costb = -tensor.log(probs.flatten()[z_flat_idx] + 1e-8) costb = costb.reshape([z.shape[0], z.shape[1]]) costb = (costb * z_mask).sum(0) costb = costb.sum() # total cost cost = costf + costb return trng, x, x_mask, y, y_mask, z, z_mask, opt_ret, cost
def init_params(options): """ Initialize all parameters """ params = OrderedDict() # if using bidirectional RNN, # forward and backward embeddings are half the final MM embedding size because # they will be concatenated to form the sentence embedding #sent_dim = int(options['dim'])//2 if options['bidirectional_enc'] else int(options['dim']) sent_dim = options['dim'] ctx_dim = options['dim'] # context vector in case of mono RNN encoder langs = options['langs'] for idx, lang in enumerate(langs): # word embeddings params['Wemb_%s' % lang] = norm_weight(options['n_words_%s' % lang], options['dim_word']) # encoder type (currently 'bow', 'gru' or 'lstm') if options['encoder_%s' % lang] != 'bow': for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_%i' % (lang, i) # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings nin = options['dim_word'] if i == 0 else sent_dim params = get_layer(options['encoder_%s' % lang])[0]( options, params, prefix=layer_name_prefix, nin=nin, dim=sent_dim) if options['bidirectional_enc']: ctx_dim = 2 * options[ 'dim'] # context vector in case of biRNN encoder for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_r_%i' % (lang, i) # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings nin = options['dim_word'] if i == 0 else sent_dim params = get_layer(options['encoder_%s' % lang])[0]( options, params, prefix=layer_name_prefix, nin=nin, dim=sent_dim) # if using general attention, create matrices for each possible # sentence-image and sentence-sentence pairs if 'attention_type' in options and options[ 'attention_type'] == 'general': # sentence_lang-image mapping params['image_sentence_%i_mapping'%idx] = \ norm_weight(options['dim_multimodal'], options['dim_multimodal'], ortho=False) # sentence_lang1 - sentence_langN mappings for idx1, lang1 in enumerate(langs): if idx == idx1 or idx1 <= idx: continue params['sentence_%i_sentence_%i_mapping'%(idx,idx1)] = \ norm_weight(options['dim_multimodal'], options['dim_multimodal'], ortho=False) # Sentence-multimodal projection params = get_layer('ff')[0](options, params, prefix='ff_sentence_mm', nin=ctx_dim, nout=options['dim_multimodal']) # Image-multimodal projection params = get_layer('ff')[0](options, params, prefix='ff_image_mm', nin=options['dim_image'], nout=options['dim_multimodal']) return params
def build_model(tparams, options): """ Computation graph for the model """ opt_ret = dict() use_noise = theano.shared(numpy.asarray(1., dtype=theano.config.floatX)) try: trng = RandomStreams(1234, use_cuda=True) except: print "Could not apply use_cuda==True in RandonStreams ..." trng = RandomStreams(1234) xs = [] xmasks = [] langs = options['langs'] for lang in langs: # description string: #words x #samples x_lang = tensor.matrix('x_%s' % lang, dtype='int64') mask_lang = tensor.matrix('mask_%s' % lang, dtype='float32') xs.append(x_lang) xmasks.append(mask_lang) xs_r = [] xmasks_r = [] if options['bidirectional_enc']: for i, lang in enumerate(langs): x_lang = xs[i] mask_lang = xmasks[i] # reverse x_lang_r = x_lang[::-1] mask_lang_r = mask_lang[::-1] xs_r.append(x_lang_r) xmasks_r.append(mask_lang_r) sents_all = [] im = tensor.matrix('im', dtype='float32') n_samples = im.shape[0] for i, lang in enumerate(langs): x_lang = xs[i] mask_lang = xmasks[i] n_timesteps_lang = x_lang.shape[0] n_samples_lang = x_lang.shape[1] if options['use_dropout']: # dropout probs for the word embeddings retain_probability_emb = 1 - options['dropout_embedding'] # dropout probs for the RNN hidden states retain_probability_hidden = 1 - options['dropout_hidden'] # dropout probs for the source words retain_probability_source = 1 - options['dropout_source'] # hidden states rec_dropout = shared_dropout_layer( (2, n_samples_lang, options['dim']), use_noise, trng, retain_probability_hidden) rec_dropout_r = shared_dropout_layer( (2, n_samples_lang, options['dim']), use_noise, trng, retain_probability_hidden) # word embeddings emb_dropout = shared_dropout_layer( (2, n_samples_lang, options['dim_word']), use_noise, trng, retain_probability_emb) emb_dropout_r = shared_dropout_layer( (2, n_samples_lang, options['dim_word']), use_noise, trng, retain_probability_emb) # source words source_dropout = shared_dropout_layer( (n_timesteps_lang, n_samples_lang, 1), use_noise, trng, retain_probability_source) source_dropout = tensor.tile(source_dropout, (1, 1, options['dim_word'])) else: # hidden states rec_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32')) rec_dropout_r = theano.shared( numpy.array([1.] * 2, dtype='float32')) # word embeddings emb_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32')) emb_dropout_r = theano.shared( numpy.array([1.] * 2, dtype='float32')) # Word embedding (for a particular language `lang`) # forward emb_lang = tparams['Wemb_%s' % lang][x_lang.flatten()] emb_lang = emb_lang.reshape( [n_timesteps_lang, n_samples_lang, options['dim_word']]) if options['use_dropout']: emb_lang *= source_dropout if options['bidirectional_enc']: x_lang_r = xs_r[i] mask_lang_r = xmasks_r[i] # backward lang encoder emb_lang_r = tparams['Wemb_%s' % lang][x_lang_r.flatten()] emb_lang_r = emb_lang_r.reshape( [n_timesteps_lang, n_samples_lang, options['dim_word']]) if options['use_dropout']: emb_lang_r *= source_dropout[::-1] # Encode sentence in language `lang` if options['encoder_%s' % lang] == 'bow': sents_lang = (emb_lang * mask_lang[:, :, None]).sum(0) else: # iteratively push input from first hidden layer until the last for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_%i' % (lang, i) # if first hidden layer use wembs, otherwise output of previous hidden layer layer_below = emb_lang if i == 0 else layer_below[0] # do not apply dropout on word embeddings layer #if options['use_dropout'] and i>0: # layer_below = dropout_layer(layer_below, use_noise, trng, prob=options['dropout_prob']) layer_below = get_layer(options['encoder_%s' % lang])[1]( tparams, layer_below, options, None, prefix=layer_name_prefix, mask=mask_lang, emb_dropout=emb_dropout, rec_dropout=rec_dropout) if i == int(options['n_enc_hidden_layers']) - 1: # sentence embeddings (projections) are the output of the last hidden layer proj_lang = layer_below # apply forward and backward steps and concatenate both if options['bidirectional_enc']: # concatenate forward and backward pass RNNs # iteratively push input from first hidden layer until the last for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_r_%i' % (lang, i) # if first hidden layer use wembs, else output of prev hidden layer layer_below = emb_lang_r if i == 0 else layer_below[0] # do not apply dropout on word embeddings layer #if options['use_dropout'] and i>0: # layer_below = dropout_layer(layer_below, use_noise, trng, prob=options['dropout_prob']) layer_below = get_layer(options['encoder_%s' % lang])[1]( tparams, layer_below, options, None, prefix=layer_name_prefix, mask=mask_lang_r, emb_dropout=emb_dropout_r, rec_dropout=rec_dropout_r) if i == int(options['n_enc_hidden_layers']) - 1: # sentence embeddings (projections) are the output of the last hidden layer proj_lang_r = layer_below # use the last state of forward + backward encoder rnns sents_lang = concatenate( [proj_lang[0][-1], proj_lang_r[0][-1]], axis=proj_lang[0].ndim - 2) else: sents_lang = proj_lang[0][-1] if options['use_dropout']: sents_lang *= shared_dropout_layer( (n_samples_lang, options['dim']), use_noise, trng, retain_probability_hidden) # project sentences into multimodal space sents_mm = get_layer('ff')[1](tparams, sents_lang, options, prefix='ff_sentence_mm', activ='linear') if options['attention_type'] == 'dot': sents_mm = l2norm(sents_mm) if options['use_dropout']: sents_mm *= shared_dropout_layer( (n_samples_lang, options['dim_multimodal']), use_noise, trng, retain_probability_hidden) sents_all.append(sents_mm) # Encode images images = get_layer('ff')[1](tparams, im, options, prefix='ff_image_mm', activ='linear') if options['attention_type'] == 'dot': images = l2norm(images) if options['use_dropout']: images *= shared_dropout_layer((n_samples, options['dim_multimodal']), use_noise, trng, retain_probability_hidden) # Compute loss lambda_img_sent = options['lambda_img_sent'] lambda_sent_sent = options['lambda_sent_sent'] if options['use_all_costs']: cost = contrastive_loss_all(tparams, options, images, sents_all, lambda_img_sent, lambda_sent_sent) else: cost = contrastive_loss(tparams, options, images, sents_all) # return flattened inputs inps = [] inps.extend(xs) inps.extend(xmasks) inps.append(im) return trng, inps, cost
def build_sentence_encoders(tparams, options): """ Sentence encoder only to be used at test time """ opt_ret = dict() trng = RandomStreams(1234) #xs, masks, sents_all = [], [], [] in_outs = [] langs = options['langs'] for lang in langs: # description string: #words x #samples # forward x = tensor.matrix('x_%s' % lang, dtype='int64') mask = tensor.matrix('x_mask_%s' % lang, dtype='float32') n_timesteps = x.shape[0] n_samples = x.shape[1] # Word embedding (forward) emb = tparams['Wemb_%s' % lang][x.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) if options['bidirectional_enc']: # backward RNN x_r = x[::-1] mask_r = mask[::-1] emb_r = tparams['Wemb_%s' % lang][x_r.flatten()].reshape( [n_timesteps, n_samples, options['dim_word']]) if options['use_dropout']: retain_probability_emb = 1 - options['dropout_embedding'] retain_probability_hidden = 1 - options['dropout_hidden'] retain_probability_source = 1 - options['dropout_source'] rec_dropout = theano.shared( numpy.array([retain_probability_hidden] * 2, dtype='float32')) rec_dropout_r = theano.shared( numpy.array([retain_probability_hidden] * 2, dtype='float32')) emb_dropout = theano.shared( numpy.array([retain_probability_emb] * 2, dtype='float32')) emb_dropout_r = theano.shared( numpy.array([retain_probability_emb] * 2, dtype='float32')) source_dropout = theano.shared( numpy.float32(retain_probability_source)) emb *= source_dropout if options['bidirectional_enc']: embr *= source_dropout else: rec_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32')) rec_dropout_r = theano.shared( numpy.array([1.] * 2, dtype='float32')) emb_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32')) emb_dropout_r = theano.shared( numpy.array([1.] * 2, dtype='float32')) # Encode sentences if options['encoder_%s' % lang] == 'bow': sents = (emb * mask[:, :, None]).sum(0) else: # iteratively push input from first hidden layer until the last for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_%i' % (lang, i) # if first layer input are wembs, otherwise input will be output of last hidden layer layer_below = emb if i == 0 else layer_below[0] layer_below = get_layer(options['encoder_%s' % lang])[1]( tparams, layer_below, options, None, prefix=layer_name_prefix, mask=mask, emb_dropout=emb_dropout, rec_dropout=rec_dropout) if i == int(options['n_enc_hidden_layers']) - 1: # sentence embeddings (projections) are the output of the last hidden layer proj = layer_below if options['bidirectional_enc']: for i in range(int(options['n_enc_hidden_layers'])): layer_name_prefix = 'encoder_%s_r_%i' % (lang, i) # if first layer input are wembs, otherwise input will be output of last hidden layer layer_below = emb_r if i == 0 else layer_below[0] layer_below = get_layer(options['encoder_%s' % lang])[1]( tparams, layer_below, options, None, prefix=layer_name_prefix, mask=mask_r, emb_dropout=emb_dropout_r, rec_dropout=rec_dropout_r) if i == int(options['n_enc_hidden_layers']) - 1: # sentence embeddings (projections) are the output of the last hidden layer proj_r = layer_below # use last hidden state of forward and backward RNNs sents = concatenate([proj[0][-1], proj_r[0][-1]], axis=proj[0].ndim - 2) else: sents = proj[0][-1] if options['use_dropout']: sents *= shared_dropout_layer((n_samples, options['dim']), use_noise, trng, retain_probability_hidden) # project sentences into multimodal space sents_mm = get_layer('ff')[1](tparams, sents, options, prefix='ff_sentence_mm', activ='linear') if not 'attention_type' in options or options[ 'attention_type'] == 'dot': sents_mm = l2norm(sents_mm) if options['use_dropout']: sents_mm *= shared_dropout_layer( (n_samples, options['dim_multimodal']), use_noise, trng, retain_probability_hidden) # outputs per language in_outs.append(([x, mask], sents_mm)) return trng, in_outs