def init_params(options, preemb=None):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    if preemb == None:
        params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    else:
        print('using preemb')
        params['Wemb'] = preemb

    # Encoder
    params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
                                              nin=options['dim_word'], dim=options['dim'])

    # Decoder: next sentence
    params = get_layer(options['decoder'])[0](options, params, prefix='decoder_f',
                                              nin=options['dim_word'], dim=options['dim'])
    # Decoder: previous sentence
    params = get_layer(options['decoder'])[0](options, params, prefix='decoder_b',
                                              nin=options['dim_word'], dim=options['dim'])

    # Output layer
    params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words'])

    return params
def build_model(tparams, options):                                                                                           
    """
    Computation graph for the model
    """
    opt_ret = dict()
    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    im = tensor.matrix('im', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    if options['encoder'] == 'bow':
        sents = (emb * mask[:,:,None]).sum(0)
    else:
        proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                                prefix='encoder',
                                                mask=mask)
        sents = proj[0][-1]
    sents = l2norm(sents)

    # Encode images (source)
    images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')

    # Compute loss
    cost = contrastive_loss(options['margin'], images, sents)

    return trng, [x, mask, im], cost
Beispiel #3
0
def init_params(options, preemb=None):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    if preemb == None:
        params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    else:
        params['Wemb'] = preemb

    # init state
    params = get_layer('ff')[0](options, params, prefix='ff_state', nin=options['dimctx'], nout=options['dim'])

    # Decoder
    params = get_layer(options['decoder'])[0](options, params, prefix='decoder',
                                              nin=options['dim_word'], dim=options['dim'])

    # Output layer
    if options['doutput']:
        params = get_layer('ff')[0](options, params, prefix='ff_hid', nin=options['dim'], nout=options['dim_word'])
        params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim_word'], nout=options['n_words'])
    else:
        params = get_layer('ff')[0](options, params, prefix='ff_logit', nin=options['dim'], nout=options['n_words'])

    return params
Beispiel #4
0
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    ctx = tensor.matrix('ctx', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Index into the word embedding matrix, shift it forward in time
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
    # make eos the first word (it has no prev word context),
    # throw out last word (it doesn't predict anything)
    emb_shifted = tensor.zeros_like(emb)
    emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
    emb = emb_shifted

    # Init state
    init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh')

    # Decoder
    proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options,
                                            prefix='decoder',
                                            mask=mask)

    # Compute word probabilities
    if options['doutput']:
        hid = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_hid', activ='tanh')
        logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear')
    else:
        logit = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))

    # Cost
    x_flat = x.flatten()
    p_flat = probs.flatten()
    cost = -tensor.log(p_flat[tensor.arange(x_flat.shape[0])*probs.shape[1]+x_flat]+1e-8)
    cost = cost.reshape([x.shape[0], x.shape[1]])
    cost = (cost * mask).sum(0)
    cost = cost.sum()

    return trng, [x, mask, ctx], cost
Beispiel #5
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    # params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])

    # We load the embeddings that we saved earlier.
    if options['dataset'] == 'amazon':
        embeddings = numpy.load(
            '/home/shunan/Code/skip-thoughts/experiments/amazon/word2vec_embeds.npy'
        )
    elif options['dataset'] == 'imdb':
        embeddings = numpy.load(
            '/home/shunan/Code/skip-thoughts/experiments/imdb/skip_thought_word2vec_embeds.npy'
        )
    params['Wemb'] = embeddings.astype('float32')

    # Encoder
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix='encoder',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Decoder: next sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_f',
                                              nin=options['dim_word'],
                                              dim=options['dim'])
    # Decoder: previous sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_b',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Output layer
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit',
                                nin=options['dim'],
                                nout=options['n_words'])

    return params
def init_params(options, rng=None):
    """
    Initialize all network parameters and constrains.
    
    All parameters and their corresponding constraints are stored in an OrderedDict.
    """
    params = OrderedDict()
    constraints = OrderedDict()

    input_size = 2  # number of player utilities

    n_hidden = [input_size] + options['hidden_units']
    for i in xrange(1, len(n_hidden)):
        params = get_layer('hid')[0](
            options,
            params,
            prefix='hidden%02d' % i,
            nin=n_hidden[i - 1] *
            (3 if options['pooling'] else 1),  # 3 x parameters if pooling used
            nout=n_hidden[i],
            rng=rng,
            b_offset=1.)
    params = get_layer('softmax')[0](options,
                                     params,
                                     nin=n_hidden[-1],
                                     rng=rng)

    ar_layers = options['ar_layers']

    for i in range(ar_layers):
        for p in range(2):
            if i == ar_layers - 1 and p == 1:
                # don't build ar layer for pl 2 in the last layer because it is not used
                continue
            params, constraints = get_layer('ar')[0](options,
                                                     params,
                                                     prefix='p%d_ar%d' %
                                                     (p, i),
                                                     nin=ar_layers,
                                                     level=i,
                                                     rng=rng,
                                                     constraints=constraints)
    params, constraints = get_layer('output')[0](options,
                                                 params,
                                                 constraints,
                                                 rng=rng,
                                                 nin=ar_layers)
    return params, constraints
Beispiel #7
0
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    ctx = tensor.matrix('ctx', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Index into the word embedding matrix, shift it forward in time
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])
    emb_shifted = tensor.zeros_like(emb)
    emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
    emb = emb_shifted

    # Init state
    init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh')

    # Decoder
    proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options,
                                            prefix='decoder',
                                            mask=mask)

    # Compute word probabilities
    if options['doutput']:
        hid = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_hid', activ='tanh')
        logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear')
    else:
        logit = get_layer('ff')[1](tparams, proj[0], options, prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))

    # Cost
    x_flat = x.flatten()
    p_flat = probs.flatten()
    cost = -tensor.log(p_flat[tensor.arange(x_flat.shape[0])*probs.shape[1]+x_flat]+1e-8)
    cost = cost.reshape([x.shape[0], x.shape[1]])
    cost = (cost * mask).sum(0)
    cost = cost.sum()

    return trng, [x, mask, ctx], cost
Beispiel #8
0
def build_sampler(tparams, options, trng):
    # x: 1 x 1
    y = tensor.vector('y_sampler', dtype='int64')
    init_state = tensor.matrix('init_state', dtype='float32')

    # if it's the first word, emb should be all zero
    emb = tensor.switch(y[:, None] < 0,
                        tensor.alloc(0., 1, tparams['Wemb'].shape[1]),
                        tparams['Wemb'][y])

    # apply one step of gru layer
    proj = get_layer(options['encoder'])[1](tparams,
                                            emb,
                                            options,
                                            prefix='encoder',
                                            mask=None,
                                            one_step=True,
                                            init_state=init_state)
    next_state = proj[0]

    # compute the output probability dist and sample
    logit_lstm = get_layer('ff')[1](tparams,
                                    next_state,
                                    options,
                                    prefix='ff_logit_lstm',
                                    activ='linear')
    logit_prev = get_layer('ff')[1](tparams,
                                    emb,
                                    options,
                                    prefix='ff_logit_prev',
                                    activ='linear')
    logit = tensor.tanh(logit_lstm + logit_prev)
    logit = get_layer('ff')[1](tparams,
                               logit,
                               options,
                               prefix='ff_logit',
                               activ='linear')
    next_probs = tensor.nnet.softmax(logit)
    next_sample = trng.multinomial(pvals=next_probs).argmax(1)

    # next word probability
    print 'Building f_next..',
    inps = [y, init_state]
    outs = [next_probs, next_sample, next_state]
    f_next = theano.function(inps, outs, name='f_next', profile=profile)
    print 'Done'

    return f_next
def build_encoder(tparams, options):
    """
    Computation graph, encoder only
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]

    return trng, x, x_mask, ctx, emb
Beispiel #10
0
def build_image_encoder(tparams, options):
    """
    Encoder only, for images
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # image features
    im = tensor.matrix('im', dtype='float32')

    # Encode images
    images_mm = get_layer('ff')[1](tparams,
                                   im,
                                   options,
                                   prefix='ff_image_mm',
                                   activ='linear')
    if not 'attention_type' in options or options['attention_type'] == 'dot':
        images_mm = l2norm(images_mm)

    if options['use_dropout']:
        images_mm *= shared_dropout_layer(
            (n_samples, options['dim_multimodal']), use_noise, trng,
            retain_probability_hidden)

    return trng, [im], images_mm
Beispiel #11
0
    def init_mlp_params(params,
                        dim=1,
                        dim_v=4096,
                        init='glorot',
                        gain=1.,
                        **kwargs):
        """
        Initialize parameters for the scoring function, an MLP.

        :param params:
        :param dim:
        :param init: glorot initialization or uniform or normal
        :param gain: parameter for glorot initializer
        :param kwargs:
        :return:
        """
        logger.warn('MLP - init: {} gain: {}'.format(init, gain))

        dim_in = 2 * dim  # we concatenate the forward and backward RNN
        dim_out = dim_v

        # The first layer goes from the bi-directional concatenation
        # to the visual feature vector dimensionality. e.g. 2 x 1000 -> 4,096
        params = get_layer('ff')[0](params,
                                    prefix='mlp',
                                    nin=dim_in,
                                    nout=dim_out,
                                    ortho=False,
                                    init=init,
                                    gain=gain)

        return params
Beispiel #12
0
def build_sentence_encoder(tparams, options):
    """
    Encoder only, for sentences
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('x_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences
    if options['encoder'] == 'bow':
        sents = (emb * mask[:, :, None]).sum(0)
    else:
        proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                                prefix='encoder',
                                                mask=mask)
        sents = proj[0][-1]
    sents = l2norm(sents)

    return trng, [x, mask], sents
Beispiel #13
0
def build_encoder(tparams, options):
    """
    Computation graph, encoder only
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_word']])

    # encoder
    proj = get_layer(options['encoder'])[1](tparams,
                                            emb,
                                            None,
                                            options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]

    return trng, x, x_mask, ctx, emb
Beispiel #14
0
def encode_images(tparams, options, im):
    im_emb = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')
    im_emb = l2norm(im_emb)
    if options['abs']:
        im_emb = abs(im_emb)

    return im_emb
def build_sentence_encoder(tparams, options):
    """
    Encoder only, for sentences
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('x_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences
    if options['encoder'] == 'bow':
        sents = (emb * mask[:,:,None]).sum(0)
    else:
        proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                                prefix='encoder',
                                                mask=mask)
        sents = proj[0][-1]
    sents = l2norm(sents)

    return trng, [x, mask], sents
Beispiel #16
0
def encode_images(tparams, options, im):
    im_emb = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')
    im_emb = l2norm(im_emb)
    if options['abs']:
        im_emb = abs(im_emb)

    return im_emb
Beispiel #17
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])

    # Sentence encoder
    params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
                                              nin=options['dim_word'], dim=options['dim'])

    # Image encoder
    params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim'])

    return params
Beispiel #18
0
def init_params(options, preemb=None):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    if preemb == None:
        params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    else:
        params['Wemb'] = preemb

    # init state
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_state',
                                nin=options['dimctx'],
                                nout=options['dim'])

    # Decoder
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Output layer
    if options['doutput']:
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_hid',
                                    nin=options['dim'],
                                    nout=options['dim_word'])
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_logit',
                                    nin=options['dim_word'],
                                    nout=options['n_words'])
    else:
        params = get_layer('ff')[0](options,
                                    params,
                                    prefix='ff_logit',
                                    nin=options['dim'],
                                    nout=options['n_words'])

    return params
Beispiel #19
0
def build_sampler(tparams, options, trng):
    """
    Forward sampling
    """
    ctx = tensor.matrix('ctx', dtype='float32')
    ctx0 = ctx

    print 'Building f_init...',
    init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh')
    f_init = theano.function([ctx], init_state, name='f_init', profile=False)

    # x: 1 x 1
    y = tensor.vector('y_sampler', dtype='int64')
    init_state = tensor.matrix('init_state', dtype='float32')

    # if it's the first word, emb should be all zero
    emb = tensor.switch(y[:,None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]),
                        tparams['Wemb'][y])

    # decoder
    proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options,
                                            prefix='decoder',
                                            mask=None,
                                            one_step=True)
    next_state = proj[0]
    if next_state.ndim > 2:
        next_state = next_state[0]

    # output
    if options['doutput']:
        hid = get_layer('ff')[1](tparams, next_state, options, prefix='ff_hid', activ='tanh')
        logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear')
    else:
        logit = get_layer('ff')[1](tparams, next_state, options, prefix='ff_logit', activ='linear')
    next_probs = tensor.nnet.softmax(logit)
    next_sample = trng.multinomial(pvals=next_probs).argmax(1)

    # next word probability
    print 'Building f_next..',
    inps = [y, init_state]
    outs = [next_probs, next_sample, next_state]
    f_next = theano.function(inps, outs, name='f_next', profile=False)
    print 'Done'

    return f_init, f_next
Beispiel #20
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])

    # Encoder
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix='encoder',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Variational layer
    params = get_layer(options['vae'])[0](options,
                                          params,
                                          prefix='vae',
                                          nhid=options['vae_nhid'],
                                          nlatent=options['vae_nlatent'],
                                          ndim=options['dim'])

    # Decoder: next sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_f',
                                              nin=options['dim_word'],
                                              dim=options['dim'])
    # Decoder: previous sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_b',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Output layer
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit',
                                nin=options['dim'],
                                nout=options['n_words'])

    return params
Beispiel #21
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])

    # Sentence encoder
    if options['encoder'] != 'bow':
        params = get_layer(options['encoder'])[0](options, params, prefix='encoder',
                                                  nin=options['dim_word'], dim=options['dim'])

    # Image encoder
    params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim'])

    return params
Beispiel #22
0
def build_sampler(tparams, options, trng):
    """
    Forward sampling
    """
    ctx = tensor.matrix('ctx', dtype='float32')
    ctx0 = ctx

    print 'Building f_init...',
    init_state = get_layer('ff')[1](tparams, ctx, options, prefix='ff_state', activ='tanh')
    f_init = theano.function([ctx], init_state, name='f_init', profile=False)

    # x: 1 x 1
    y = tensor.vector('y_sampler', dtype='int64')
    init_state = tensor.matrix('init_state', dtype='float32')

    # if it's the first word, emb should be all zero
    emb = tensor.switch(y[:,None] < 0, tensor.alloc(0., 1, tparams['Wemb'].shape[1]),
                        tparams['Wemb'][y])

    # decoder
    proj = get_layer(options['decoder'])[1](tparams, emb, init_state, options,
                                            prefix='decoder',
                                            mask=None,
                                            one_step=True)
    next_state = proj[0]

    # output
    if options['doutput']:
        hid = get_layer('ff')[1](tparams, next_state, options, prefix='ff_hid', activ='tanh')
        logit = get_layer('ff')[1](tparams, hid, options, prefix='ff_logit', activ='linear')
    else:
        logit = get_layer('ff')[1](tparams, next_state, options, prefix='ff_logit', activ='linear')
    next_probs = tensor.nnet.softmax(logit)
    next_sample = trng.multinomial(pvals=next_probs).argmax(1)

    # next word probability
    print 'Building f_next..',
    inps = [y, init_state]
    outs = [next_probs, next_sample, next_state]
    f_next = theano.function(inps, outs, name='f_next', profile=False)
    print 'Done'

    return f_init, f_next
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()
    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    im = tensor.matrix('im', dtype='float32')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    if options['encoder'] == 'bow':
        sents = (emb * mask[:, :, None]).sum(0)
    else:
        proj = get_layer(options['encoder'])[1](tparams,
                                                emb,
                                                None,
                                                options,
                                                prefix='encoder',
                                                mask=mask)
        sents = proj[0][-1]
    sents = l2norm(sents)

    # Encode images (source)
    images = get_layer('ff')[1](tparams,
                                im,
                                options,
                                prefix='ff_image',
                                activ='linear')

    # Compute loss
    cost = contrastive_loss(options['margin'], images, sents)

    return trng, [x, mask, im], cost
def encode_sentences_with_topicvector(tparams, options, x, mask, topics):
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]
    t2gru_emb = get_layer('ff')[1](tparams, topics, options, prefix='ff_topic_vector1_emb_gru', activ='linear')
    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    proj = get_layer(options['encoder'])[1](tparams, emb, t2gru_emb * 0.1, options,
                                            prefix='encoder',
                                            mask=mask)
    s = proj[0][-1]
    #if options['v_norm'] == 'l2' :
    s = l2norm(s)
    #s = maxnorm2(s)
    if options['abs']:
        #s = abs(s)
        s = tensor.maximum(s, 0)
    return s
def encode_images(tparams, options, im):
    im_emb = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')
    #if options['v_norm'] == 'l2' :
    im_emb = l2norm(im_emb)
    #im_emb = maxnorm2(im_emb)
    if options['abs']:
        #im_emb = abs(im_emb)
        im_emb = tensor.maximum(im_emb, 0)
        
    return im_emb
def encode_topic_vector2(tparams, options, topics):
    t_emb = get_layer('ff')[1](tparams, topics, options, prefix='ff_topic_vector2', activ='linear')
    t_emb = l2norm(t_emb)
    #t_emb = maxnorm2(t_emb)

    if options['abs']:
        #im_emb = abs(im_emb)
        t_emb = tensor.maximum(t_emb, 0)
        
    return t_emb
Beispiel #27
0
def init_params(options, preemb=None):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # Word embedding
    if preemb == None:
        params['Wemb'] = norm_weight(options['n_words'], options['dim_word'])
    else:
        print('using preemb')
        params['Wemb'] = preemb

    # Encoder
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix='encoder',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Decoder: next sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_f',
                                              nin=options['dim_word'],
                                              dim=options['dim'])
    # Decoder: previous sentence
    params = get_layer(options['decoder'])[0](options,
                                              params,
                                              prefix='decoder_b',
                                              nin=options['dim_word'],
                                              dim=options['dim'])

    # Output layer
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_logit',
                                nin=options['dim'],
                                nout=options['n_words'])

    return params
def build_model(tparams, options):                                                                                           
    """
    Computation graph for the model
    """
    opt_ret = dict()
    trng = RandomStreams(1234)

    # description string: #words x #samples
    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype='float32')
    im = tensor.matrix('im', dtype='float32')
    con = tensor.matrix('con', dtype='int64')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                            prefix='encoder',
                                            mask=mask)
    sents = proj[0][-1]
    sents = l2norm(sents)

    # Encode images (source)
    images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')

    # Compute loss
    cost, updates = theano.scan(_step,
                                sequences=con,
                                outputs_info=tensor.alloc(0.),
                                non_sequences = [sents, images, options['margin']],
                                n_steps=con.shape[0],
                                profile=False,
                                strict=True)
    cost = cost[-1]
                               
    return trng, [x, mask, im, con], cost
Beispiel #29
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # if using bidirectional RNN,
    # forward and backward embeddings are half the final MM embedding size because
    # they will be concatenated to form the sentence embedding
    sent_dim = int(options['dim'])//2 if options['bidirectional_enc'] else int(options['dim'])

    langs = options['langs']
    for lang in langs:
        # word embeddings
        params['Wemb_%s'%lang] = norm_weight(options['n_words_%s'%lang], options['dim_word'])

        # encoder type (currently 'bow', 'gru' or 'lstm')
        if options['encoder_%s'%lang] != 'bow':
            for i in range(int(options['n_enc_hidden_layers'])):
                layer_name_prefix='encoder_%s_%i'%(lang,i)
                # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings
                nin=options['dim_word'] if i==0 else sent_dim
                params = get_layer(options['encoder_%s'%lang])[0](options, params, prefix=layer_name_prefix,
                                                                  nin=nin, dim=sent_dim)
            if options['bidirectional_enc']:
                for i in range(int(options['n_enc_hidden_layers'])):
                    layer_name_prefix='encoder_%s_r_%i'%(lang,i)
                    # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings
                    nin=options['dim_word'] if i==0 else sent_dim
                    params = get_layer(options['encoder_%s'%lang])[0](options, params, prefix=layer_name_prefix,
                                                                      nin=nin, dim=sent_dim)

    # Image encoder
    params = get_layer('ff')[0](options, params, prefix='ff_image', nin=options['dim_image'], nout=options['dim'])

    return params
Beispiel #30
0
def encode_sentences(tparams, options, x, mask):
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                            prefix='encoder',
                                            mask=mask)
    s = l2norm(proj[0][-1])
    if options['abs']:
        s = abs(s)

    return s
Beispiel #31
0
def encode_sentences(tparams, options, x, mask):
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # Encode sentences (source)
    proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                            prefix='encoder',
                                            mask=mask)
    s = l2norm(proj[0][-1])
    if options['abs']:
        s = abs(s)

    return s
Beispiel #32
0
def build_image_encoder(tparams, options):
    """
    Encoder only, for images
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # image features
    im = tensor.matrix('im', dtype='float32')

    # Encode images
    images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')
    images = l2norm(images)
    
    return trng, [im], images
def build_image_encoder(tparams, options):
    """
    Encoder only, for images
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # image features
    im = tensor.matrix('im', dtype='float32')

    # Encode images
    images = get_layer('ff')[1](tparams, im, options, prefix='ff_image', activ='linear')
    images = l2norm(images)
    
    return trng, [im], images
def build_ar_layers(x, tparams, options, features, hiddens):
    u1, u2 = (x[:, 0, :, :], x[:, 1, :, :].transpose(0, 2, 1))

    h1, h2 = hiddens
    # concatinate the payoff matrix onto the final layer hidden units
    utility = (tensor.concatenate((u1.reshape(
        (u1.shape[0], 1, u1.shape[1], u1.shape[2])), h1),
                                  axis=1),
               tensor.concatenate((u2.reshape(
                   (u2.shape[0], 1, u2.shape[1], u2.shape[2])), h2),
                                  axis=1))

    ar_layers = options['ar_layers']

    ar_lists = ([], [])
    opp = [None, None]
    weighted_feature_list = ([], [])
    br_list = ([], [])
    for i in range(ar_layers):
        for p in range(2):
            if i == (ar_layers - 1) and p == 1:
                continue  # don't build ar layer for pl 2 in the last layer
            feat = features[p]
            ar, weighted_features, br = get_layer('ar')[1](tparams,
                                                           feat,
                                                           options,
                                                           payoff=utility[p],
                                                           prefix='p%d_ar%d' %
                                                           (p, i),
                                                           opposition=opp[p],
                                                           level=i)
            n, d = ar.shape
            ar = ar.reshape((n, 1, d))  # make space to concat ar layers
            weighted_feature_list[p].append(weighted_features)
            if i == 0:
                ar_lists[p].append(ar)
            else:
                ar_lists[p].append(
                    tensor.concatenate((ar_lists[p][i - 1], ar), axis=1))
                br_list[p].append(br)

        # append each layer then update the opposition variable...
        if i < ar_layers - 1:
            for p in range(2):
                opp[1 - p] = ar_lists[p][i]
    # return ar_lists[0][ar_layers-1]
    return ar_lists, weighted_feature_list, br_list
Beispiel #35
0
def build_encoder_w2v(tparams, options):
    """
    Computation graph for encoder, given pre-trained word embeddings
    """
    opt_ret = dict()
    trng = RandomStreams(1234)
    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    x_mask = tensor.matrix('x_mask', dtype='float32')
    # encoder
    proj = get_layer(options['encoder'])[1](tparams,
                                            embedding,
                                            None,
                                            options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]
    return trng, embedding, x_mask, ctx
Beispiel #36
0
    def __init__(self, net_config, data_cache):
        #net_config_str = json.dumps(net_config, sort_keys=True, indent=4, separators=(',', ':'))
        #LOGGER.info("\n" + net_config_str)
        self.data_cache = data_cache
        self.inputs = net_config.get("inputs", [])
        self.check_net_config(net_config)
        self.outputs = net_config.get("outputs", [])

        # layers
        self.layers = []
        self.name2layer = {}
        model_disk_base = net_config.get("model_cache",
                                         {}).get("disk_base", None)
        for layer_config in net_config["layers"]:
            layer = get_layer(layer_config, self.data_cache)
            layer.model_disk_base = model_disk_base
            self.layers.append(layer)
            self.name2layer[layer.name] = layer
def build_encoder_w2v(tparams, options):
    """
    Computation graph for encoder, given pre-trained word embeddings
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # word embedding (source)
    embedding = tensor.tensor3('embedding', dtype='float32')
    x_mask = tensor.matrix('x_mask', dtype='float32')

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, embedding, None, options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]

    return trng, embedding, x_mask, ctx
Beispiel #38
0
    def build_mlp_predict(self,
                          enc_states,
                          dim_emb=0,
                          dim=0,
                          activation_mlp='relu',
                          **kwargs):
        """
        Builds an MLP scoring function for use during prediction / test time.

        We want this to predict a single 4096d vector for each input.

        :param dim_emb:
        :param dim:
        :param activation_mlp:
        :param kwargs:
        :return:

        TODO: Redefine the predict function so it predicts the embedding of
        the sentence and the image embedding.
        """

        # set MLP activation function
        assert activation_mlp in ('relu', 'tanh'), \
            'MLP activation function must be tanh or relu'
        activation_mlp = 'lambda x: tensor.nnet.relu(x)' \
            if activation_mlp == 'relu' else 'lambda x: tensor.tanh(x)'
        logger.warn('Using MLP activation function: {}'.format(activation_mlp))

        theano_params = self.theano_params
        # The input to the MLP will be the mean value of the hidden states for
        # each instance in the minibatch.
        if kwargs['verbose']:
            logger.warn(states_mean.tag.test_value)

        # train a single layer MLP to do everything
        output = get_layer('ff')[1](theano_params,
                                    enc_states,
                                    prefix='mlp',
                                    activ=activation_mlp)

        return output
def build_model(tparams, options, rng=None):
    """
    Computation graph for the model
    """
    if rng is None:
        rng = numpy.random.RandomState(123)
    trng = RandomStreams(rng.randint(1000000))
    use_noise = theano.shared(numpy.float32(0.))
    x = tensor.tensor4('x')

    own_features, hidden1 = build_features(x, tparams, options, use_noise,
                                           trng)
    opp_features, hidden2 = build_features(
        x.transpose(
            (0, 1, 3, 2))[:, [1, 0], :, :],  # transpose to get player 2 model
        tparams,
        options,
        use_noise,
        trng)

    ar, weighted_feature_list, br_list = build_ar_layers(
        x, tparams, options, (own_features, opp_features),
        (hidden1[-1], hidden2[-1]))
    ar_layers = options['ar_layers']
    out = get_layer('output')[1](tparams, ar[0][ar_layers - 1], options)

    intermediate_fns = {
        'ar': ar,
        'own_features': own_features,
        'opp_features': opp_features,
        'hidden1': hidden1,
        'hidden2': hidden2,
        'weighted_feature_list': weighted_feature_list,
        'br_list': br_list
    }
    if not options['debug']:
        return trng, use_noise, x, out
    else:
        return trng, use_noise, x, out, intermediate_fns
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    # x: current sentence
    # y: next sentence
    # z: previous sentence
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')
    y = tensor.matrix('y', dtype='int64')
    y_mask = tensor.matrix('y_mask', dtype='float32')
    z = tensor.matrix('z', dtype='int64')
    z_mask = tensor.matrix('z_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_timesteps_f = y.shape[0]
    n_timesteps_b = z.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps, n_samples, options['dim_word']])

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, emb, None, options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]
    dec_ctx = ctx

    # Word embedding (ahead)
    embf = tparams['Wemb'][y.flatten()].reshape([n_timesteps_f, n_samples, options['dim_word']])
    embf_shifted = tensor.zeros_like(embf)
    embf_shifted = tensor.set_subtensor(embf_shifted[1:], embf[:-1])
    embf = embf_shifted

    # Word embedding (behind)
    embb = tparams['Wemb'][z.flatten()].reshape([n_timesteps_b, n_samples, options['dim_word']])
    embb_shifted = tensor.zeros_like(embb)
    embb_shifted = tensor.set_subtensor(embb_shifted[1:], embb[:-1])
    embb = embb_shifted

    # decoder (ahead)
    projf = get_layer(options['decoder'])[1](tparams, embf, dec_ctx, options,
                                             prefix='decoder_f',
                                             mask=y_mask)

    # decoder (behind)
    projb = get_layer(options['decoder'])[1](tparams, embb, dec_ctx, options,
                                             prefix='decoder_b',
                                             mask=z_mask)

    # compute word probabilities (ahead)
    logit = get_layer('ff')[1](tparams, projf[0], options, prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))

    # cost (ahead)
    y_flat = y.flatten()
    y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat
    costf = -tensor.log(probs.flatten()[y_flat_idx]+1e-8)
    costf = costf.reshape([y.shape[0],y.shape[1]])
    costf = (costf * y_mask).sum(0)
    costf = costf.sum()

    # compute word probabilities (behind)
    logit = get_layer('ff')[1](tparams, projb[0], options, prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1], logit_shp[2]]))

    # cost (behind)
    z_flat = z.flatten()
    z_flat_idx = tensor.arange(z_flat.shape[0]) * options['n_words'] + z_flat
    costb = -tensor.log(probs.flatten()[z_flat_idx]+1e-8)
    costb = costb.reshape([z.shape[0],z.shape[1]])
    costb = (costb * z_mask).sum(0)
    costb = costb.sum()

    # total cost
    cost = costf + costb

    return trng, x, x_mask, y, y_mask, z, z_mask, opt_ret, cost
Beispiel #41
0
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()

    trng = RandomStreams(1234)

    # description string: #words x #samples
    # x: current sentence
    # y: next sentence
    # z: previous sentence
    x = tensor.matrix('x', dtype='int64')
    x_mask = tensor.matrix('x_mask', dtype='float32')
    y = tensor.matrix('y', dtype='int64')
    y_mask = tensor.matrix('y_mask', dtype='float32')
    z = tensor.matrix('z', dtype='int64')
    z_mask = tensor.matrix('z_mask', dtype='float32')

    n_timesteps = x.shape[0]
    n_timesteps_f = y.shape[0]
    n_timesteps_b = z.shape[0]
    n_samples = x.shape[1]

    # Word embedding (source)
    emb = tparams['Wemb'][x.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_word']])

    # encoder
    proj = get_layer(options['encoder'])[1](tparams,
                                            emb,
                                            None,
                                            options,
                                            prefix='encoder',
                                            mask=x_mask)
    ctx = proj[0][-1]
    dec_ctx = ctx

    # Word embedding (ahead)
    embf = tparams['Wemb'][y.flatten()].reshape(
        [n_timesteps_f, n_samples, options['dim_word']])
    embf_shifted = tensor.zeros_like(embf)
    embf_shifted = tensor.set_subtensor(embf_shifted[1:], embf[:-1])
    embf = embf_shifted

    # Word embedding (behind)
    embb = tparams['Wemb'][z.flatten()].reshape(
        [n_timesteps_b, n_samples, options['dim_word']])
    embb_shifted = tensor.zeros_like(embb)
    embb_shifted = tensor.set_subtensor(embb_shifted[1:], embb[:-1])
    embb = embb_shifted

    # decoder (ahead)
    projf = get_layer(options['decoder'])[1](tparams,
                                             embf,
                                             dec_ctx,
                                             options,
                                             prefix='decoder_f',
                                             mask=y_mask)

    # decoder (behind)
    projb = get_layer(options['decoder'])[1](tparams,
                                             embb,
                                             dec_ctx,
                                             options,
                                             prefix='decoder_b',
                                             mask=z_mask)

    # compute word probabilities (ahead)
    logit = get_layer('ff')[1](tparams,
                               projf[0],
                               options,
                               prefix='ff_logit',
                               activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(
        logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))

    # cost (ahead)
    y_flat = y.flatten()
    y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words'] + y_flat
    costf = -tensor.log(probs.flatten()[y_flat_idx] + 1e-8)
    costf = costf.reshape([y.shape[0], y.shape[1]])
    costf = (costf * y_mask).sum(0)
    costf = costf.sum()

    # compute word probabilities (behind)
    logit = get_layer('ff')[1](tparams,
                               projb[0],
                               options,
                               prefix='ff_logit',
                               activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(
        logit.reshape([logit_shp[0] * logit_shp[1], logit_shp[2]]))

    # cost (behind)
    z_flat = z.flatten()
    z_flat_idx = tensor.arange(z_flat.shape[0]) * options['n_words'] + z_flat
    costb = -tensor.log(probs.flatten()[z_flat_idx] + 1e-8)
    costb = costb.reshape([z.shape[0], z.shape[1]])
    costb = (costb * z_mask).sum(0)
    costb = costb.sum()

    # total cost
    cost = costf + costb

    return trng, x, x_mask, y, y_mask, z, z_mask, opt_ret, cost
Beispiel #42
0
def init_params(options):
    """
    Initialize all parameters
    """
    params = OrderedDict()

    # if using bidirectional RNN,
    # forward and backward embeddings are half the final MM embedding size because
    # they will be concatenated to form the sentence embedding
    #sent_dim = int(options['dim'])//2 if options['bidirectional_enc'] else int(options['dim'])
    sent_dim = options['dim']
    ctx_dim = options['dim']  # context vector in case of mono RNN encoder

    langs = options['langs']
    for idx, lang in enumerate(langs):
        # word embeddings
        params['Wemb_%s' % lang] = norm_weight(options['n_words_%s' % lang],
                                               options['dim_word'])

        # encoder type (currently 'bow', 'gru' or 'lstm')
        if options['encoder_%s' % lang] != 'bow':
            for i in range(int(options['n_enc_hidden_layers'])):
                layer_name_prefix = 'encoder_%s_%i' % (lang, i)
                # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings
                nin = options['dim_word'] if i == 0 else sent_dim
                params = get_layer(options['encoder_%s' % lang])[0](
                    options,
                    params,
                    prefix=layer_name_prefix,
                    nin=nin,
                    dim=sent_dim)
            if options['bidirectional_enc']:
                ctx_dim = 2 * options[
                    'dim']  # context vector in case of biRNN encoder

                for i in range(int(options['n_enc_hidden_layers'])):
                    layer_name_prefix = 'encoder_%s_r_%i' % (lang, i)
                    # first hidden layer has input word embeddings, next layers have input (hidden) sentence embeddings
                    nin = options['dim_word'] if i == 0 else sent_dim
                    params = get_layer(options['encoder_%s' % lang])[0](
                        options,
                        params,
                        prefix=layer_name_prefix,
                        nin=nin,
                        dim=sent_dim)

        # if using general attention, create matrices for each possible
        # sentence-image and sentence-sentence pairs
        if 'attention_type' in options and options[
                'attention_type'] == 'general':
            # sentence_lang-image mapping
            params['image_sentence_%i_mapping'%idx] = \
                norm_weight(options['dim_multimodal'], options['dim_multimodal'], ortho=False)

            # sentence_lang1 - sentence_langN mappings
            for idx1, lang1 in enumerate(langs):
                if idx == idx1 or idx1 <= idx: continue
                params['sentence_%i_sentence_%i_mapping'%(idx,idx1)] = \
                    norm_weight(options['dim_multimodal'], options['dim_multimodal'], ortho=False)

    # Sentence-multimodal projection
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_sentence_mm',
                                nin=ctx_dim,
                                nout=options['dim_multimodal'])

    # Image-multimodal projection
    params = get_layer('ff')[0](options,
                                params,
                                prefix='ff_image_mm',
                                nin=options['dim_image'],
                                nout=options['dim_multimodal'])

    return params
Beispiel #43
0
def build_model(tparams, options):
    """
    Computation graph for the model
    """
    opt_ret = dict()
    use_noise = theano.shared(numpy.asarray(1., dtype=theano.config.floatX))
    try:
        trng = RandomStreams(1234, use_cuda=True)
    except:
        print "Could not apply use_cuda==True in RandonStreams ..."
        trng = RandomStreams(1234)

    xs = []
    xmasks = []

    langs = options['langs']
    for lang in langs:
        # description string: #words x #samples
        x_lang = tensor.matrix('x_%s' % lang, dtype='int64')
        mask_lang = tensor.matrix('mask_%s' % lang, dtype='float32')
        xs.append(x_lang)
        xmasks.append(mask_lang)

    xs_r = []
    xmasks_r = []
    if options['bidirectional_enc']:
        for i, lang in enumerate(langs):
            x_lang = xs[i]
            mask_lang = xmasks[i]
            # reverse
            x_lang_r = x_lang[::-1]
            mask_lang_r = mask_lang[::-1]

            xs_r.append(x_lang_r)
            xmasks_r.append(mask_lang_r)

    sents_all = []
    im = tensor.matrix('im', dtype='float32')
    n_samples = im.shape[0]

    for i, lang in enumerate(langs):
        x_lang = xs[i]
        mask_lang = xmasks[i]

        n_timesteps_lang = x_lang.shape[0]
        n_samples_lang = x_lang.shape[1]

        if options['use_dropout']:
            # dropout probs for the word embeddings
            retain_probability_emb = 1 - options['dropout_embedding']
            # dropout probs for the RNN hidden states
            retain_probability_hidden = 1 - options['dropout_hidden']
            # dropout probs for the source words
            retain_probability_source = 1 - options['dropout_source']
            # hidden states
            rec_dropout = shared_dropout_layer(
                (2, n_samples_lang, options['dim']), use_noise, trng,
                retain_probability_hidden)
            rec_dropout_r = shared_dropout_layer(
                (2, n_samples_lang, options['dim']), use_noise, trng,
                retain_probability_hidden)
            # word embeddings
            emb_dropout = shared_dropout_layer(
                (2, n_samples_lang, options['dim_word']), use_noise, trng,
                retain_probability_emb)
            emb_dropout_r = shared_dropout_layer(
                (2, n_samples_lang, options['dim_word']), use_noise, trng,
                retain_probability_emb)
            # source words
            source_dropout = shared_dropout_layer(
                (n_timesteps_lang, n_samples_lang, 1), use_noise, trng,
                retain_probability_source)
            source_dropout = tensor.tile(source_dropout,
                                         (1, 1, options['dim_word']))
        else:
            # hidden states
            rec_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32'))
            rec_dropout_r = theano.shared(
                numpy.array([1.] * 2, dtype='float32'))
            # word embeddings
            emb_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32'))
            emb_dropout_r = theano.shared(
                numpy.array([1.] * 2, dtype='float32'))

        # Word embedding (for a particular language `lang`)
        # forward
        emb_lang = tparams['Wemb_%s' % lang][x_lang.flatten()]
        emb_lang = emb_lang.reshape(
            [n_timesteps_lang, n_samples_lang, options['dim_word']])

        if options['use_dropout']:
            emb_lang *= source_dropout

        if options['bidirectional_enc']:
            x_lang_r = xs_r[i]
            mask_lang_r = xmasks_r[i]

            # backward lang encoder
            emb_lang_r = tparams['Wemb_%s' % lang][x_lang_r.flatten()]
            emb_lang_r = emb_lang_r.reshape(
                [n_timesteps_lang, n_samples_lang, options['dim_word']])

            if options['use_dropout']:
                emb_lang_r *= source_dropout[::-1]

        # Encode sentence in language `lang`
        if options['encoder_%s' % lang] == 'bow':
            sents_lang = (emb_lang * mask_lang[:, :, None]).sum(0)
        else:
            # iteratively push input from first hidden layer until the last
            for i in range(int(options['n_enc_hidden_layers'])):
                layer_name_prefix = 'encoder_%s_%i' % (lang, i)
                # if first hidden layer use wembs, otherwise output of previous hidden layer
                layer_below = emb_lang if i == 0 else layer_below[0]

                # do not apply dropout on word embeddings layer
                #if options['use_dropout'] and i>0:
                #    layer_below = dropout_layer(layer_below, use_noise, trng, prob=options['dropout_prob'])

                layer_below = get_layer(options['encoder_%s' % lang])[1](
                    tparams,
                    layer_below,
                    options,
                    None,
                    prefix=layer_name_prefix,
                    mask=mask_lang,
                    emb_dropout=emb_dropout,
                    rec_dropout=rec_dropout)

                if i == int(options['n_enc_hidden_layers']) - 1:
                    # sentence embeddings (projections) are the output of the last hidden layer
                    proj_lang = layer_below

            # apply forward and backward steps and concatenate both
            if options['bidirectional_enc']:
                # concatenate forward and backward pass RNNs
                # iteratively push input from first hidden layer until the last
                for i in range(int(options['n_enc_hidden_layers'])):
                    layer_name_prefix = 'encoder_%s_r_%i' % (lang, i)
                    # if first hidden layer use wembs, else output of prev hidden layer
                    layer_below = emb_lang_r if i == 0 else layer_below[0]

                    # do not apply dropout on word embeddings layer
                    #if options['use_dropout'] and i>0:
                    #    layer_below = dropout_layer(layer_below, use_noise, trng, prob=options['dropout_prob'])

                    layer_below = get_layer(options['encoder_%s' % lang])[1](
                        tparams,
                        layer_below,
                        options,
                        None,
                        prefix=layer_name_prefix,
                        mask=mask_lang_r,
                        emb_dropout=emb_dropout_r,
                        rec_dropout=rec_dropout_r)

                    if i == int(options['n_enc_hidden_layers']) - 1:
                        # sentence embeddings (projections) are the output of the last hidden layer
                        proj_lang_r = layer_below

                # use the last state of forward + backward encoder rnns
                sents_lang = concatenate(
                    [proj_lang[0][-1], proj_lang_r[0][-1]],
                    axis=proj_lang[0].ndim - 2)
            else:
                sents_lang = proj_lang[0][-1]

        if options['use_dropout']:
            sents_lang *= shared_dropout_layer(
                (n_samples_lang, options['dim']), use_noise, trng,
                retain_probability_hidden)

        # project sentences into multimodal space
        sents_mm = get_layer('ff')[1](tparams,
                                      sents_lang,
                                      options,
                                      prefix='ff_sentence_mm',
                                      activ='linear')

        if options['attention_type'] == 'dot':
            sents_mm = l2norm(sents_mm)

        if options['use_dropout']:
            sents_mm *= shared_dropout_layer(
                (n_samples_lang, options['dim_multimodal']), use_noise, trng,
                retain_probability_hidden)

        sents_all.append(sents_mm)

    # Encode images
    images = get_layer('ff')[1](tparams,
                                im,
                                options,
                                prefix='ff_image_mm',
                                activ='linear')

    if options['attention_type'] == 'dot':
        images = l2norm(images)

    if options['use_dropout']:
        images *= shared_dropout_layer((n_samples, options['dim_multimodal']),
                                       use_noise, trng,
                                       retain_probability_hidden)

    # Compute loss
    lambda_img_sent = options['lambda_img_sent']
    lambda_sent_sent = options['lambda_sent_sent']
    if options['use_all_costs']:
        cost = contrastive_loss_all(tparams, options, images, sents_all,
                                    lambda_img_sent, lambda_sent_sent)
    else:
        cost = contrastive_loss(tparams, options, images, sents_all)

    # return flattened inputs
    inps = []
    inps.extend(xs)
    inps.extend(xmasks)
    inps.append(im)

    return trng, inps, cost
Beispiel #44
0
def build_sentence_encoders(tparams, options):
    """
    Sentence encoder only to be used at test time
    """
    opt_ret = dict()
    trng = RandomStreams(1234)

    #xs, masks, sents_all = [], [], []
    in_outs = []

    langs = options['langs']
    for lang in langs:
        # description string: #words x #samples
        # forward
        x = tensor.matrix('x_%s' % lang, dtype='int64')
        mask = tensor.matrix('x_mask_%s' % lang, dtype='float32')

        n_timesteps = x.shape[0]
        n_samples = x.shape[1]

        # Word embedding (forward)
        emb = tparams['Wemb_%s' % lang][x.flatten()].reshape(
            [n_timesteps, n_samples, options['dim_word']])

        if options['bidirectional_enc']:
            # backward RNN
            x_r = x[::-1]
            mask_r = mask[::-1]
            emb_r = tparams['Wemb_%s' % lang][x_r.flatten()].reshape(
                [n_timesteps, n_samples, options['dim_word']])

        if options['use_dropout']:
            retain_probability_emb = 1 - options['dropout_embedding']
            retain_probability_hidden = 1 - options['dropout_hidden']
            retain_probability_source = 1 - options['dropout_source']
            rec_dropout = theano.shared(
                numpy.array([retain_probability_hidden] * 2, dtype='float32'))
            rec_dropout_r = theano.shared(
                numpy.array([retain_probability_hidden] * 2, dtype='float32'))
            emb_dropout = theano.shared(
                numpy.array([retain_probability_emb] * 2, dtype='float32'))
            emb_dropout_r = theano.shared(
                numpy.array([retain_probability_emb] * 2, dtype='float32'))
            source_dropout = theano.shared(
                numpy.float32(retain_probability_source))
            emb *= source_dropout
            if options['bidirectional_enc']:
                embr *= source_dropout
        else:
            rec_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32'))
            rec_dropout_r = theano.shared(
                numpy.array([1.] * 2, dtype='float32'))
            emb_dropout = theano.shared(numpy.array([1.] * 2, dtype='float32'))
            emb_dropout_r = theano.shared(
                numpy.array([1.] * 2, dtype='float32'))

        # Encode sentences
        if options['encoder_%s' % lang] == 'bow':
            sents = (emb * mask[:, :, None]).sum(0)
        else:
            # iteratively push input from first hidden layer until the last
            for i in range(int(options['n_enc_hidden_layers'])):
                layer_name_prefix = 'encoder_%s_%i' % (lang, i)
                # if first layer input are wembs, otherwise input will be output of last hidden layer
                layer_below = emb if i == 0 else layer_below[0]
                layer_below = get_layer(options['encoder_%s' % lang])[1](
                    tparams,
                    layer_below,
                    options,
                    None,
                    prefix=layer_name_prefix,
                    mask=mask,
                    emb_dropout=emb_dropout,
                    rec_dropout=rec_dropout)

                if i == int(options['n_enc_hidden_layers']) - 1:
                    # sentence embeddings (projections) are the output of the last hidden layer
                    proj = layer_below

            if options['bidirectional_enc']:
                for i in range(int(options['n_enc_hidden_layers'])):
                    layer_name_prefix = 'encoder_%s_r_%i' % (lang, i)
                    # if first layer input are wembs, otherwise input will be output of last hidden layer
                    layer_below = emb_r if i == 0 else layer_below[0]
                    layer_below = get_layer(options['encoder_%s' % lang])[1](
                        tparams,
                        layer_below,
                        options,
                        None,
                        prefix=layer_name_prefix,
                        mask=mask_r,
                        emb_dropout=emb_dropout_r,
                        rec_dropout=rec_dropout_r)

                    if i == int(options['n_enc_hidden_layers']) - 1:
                        # sentence embeddings (projections) are the output of the last hidden layer
                        proj_r = layer_below

                # use last hidden state of forward and backward RNNs
                sents = concatenate([proj[0][-1], proj_r[0][-1]],
                                    axis=proj[0].ndim - 2)
            else:
                sents = proj[0][-1]

        if options['use_dropout']:
            sents *= shared_dropout_layer((n_samples, options['dim']),
                                          use_noise, trng,
                                          retain_probability_hidden)

        # project sentences into multimodal space
        sents_mm = get_layer('ff')[1](tparams,
                                      sents,
                                      options,
                                      prefix='ff_sentence_mm',
                                      activ='linear')
        if not 'attention_type' in options or options[
                'attention_type'] == 'dot':
            sents_mm = l2norm(sents_mm)

        if options['use_dropout']:
            sents_mm *= shared_dropout_layer(
                (n_samples, options['dim_multimodal']), use_noise, trng,
                retain_probability_hidden)

        # outputs per language
        in_outs.append(([x, mask], sents_mm))

    return trng, in_outs