コード例 #1
0
    def step(self, y_prev, mask, state, *args):
        n_src = self.n_src
        assert len(args) == self.n_src * 3
        src_keys = args[:n_src]
        src_values = args[n_src:2 * n_src]
        src_masks = args[2 * n_src:]

        mask = mask[:, None]
        # s_j^{\prime} = GRU^1(y_{j-1}, s_{j-1})
        _, state_prime = self.cell1(y_prev, state, scope="gru1")
        state_prime = (1.0 - mask) * state + mask * state_prime
        # c_j = att(H, s_j^{\prime})
        contexts = []
        for i, _key, _val, _mask in itertools.izip(itertools.count(), src_keys,
                                                   src_values, src_masks):
            alpha = attention(state_prime,
                              _key,
                              _mask,
                              self.dim_hid,
                              self.dim_key,
                              scope='attn_alpha_%d' % i)
            context = theano.tensor.sum(alpha[:, :, None] * _val, 0)
            contexts.append(context)
        if self.method == "attn":
            contexts = T.reshape(T.concatenate(contexts, 0),
                                 [n_src] + list(contexts[0].shape))
            with ops.variable_scope("beta"):
                beta_keys = map_key(contexts, self.dim_value, self.dim_key)

            beta = attention(state_prime,
                             beta_keys,
                             T.ones(contexts.shape[:2]),
                             self.dim_hid,
                             self.dim_key,
                             scope='beta')
            context = T.sum(beta[:, :, None] * contexts, 0)
        elif self.method == "concat":
            context = T.concatenate(contexts, -1)

        # s_j = GRU^2(c_j, s_j^{\prime})
        output, next_state = self.cell2(context, state_prime, scope="gru2")
        next_state = (1.0 - mask) * state + mask * next_state
        return next_state, context
コード例 #2
0
    def __init__(self, **option):

        # source and target embedding dim
        sedim, tedim = option["embdim"]
        # source, target and attention hidden dim
        shdim, thdim, ahdim = option["hidden"]
        # maxout hidden dim
        maxdim = option["maxhid"]
        # maxout part
        maxpart = option["maxpart"]
        # deepout hidden dim
        deephid = option["deephid"]
        svocab, tvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab
        # source and target vocabulary size
        svsize, tvsize = len(sid2w), len(tid2w)

        if "scope" not in option or option["scope"] is None:
            option["scope"] = "rnnsearch"

        if "initializer" not in option:
            option["initializer"] = None

        if "regularizer" not in option:
            option["regularizer"] = None

        if "keep_prob" not in option:
            option["keep_prob"] = 1.0

        dtype = theano.config.floatX
        initializer = option["initializer"]
        regularizer = option["regularizer"]
        keep_prob = option["keep_prob"] or 1.0

        scope = option["scope"]
        decoder_scope = "decoder2"

        encoder = Encoder(sedim, shdim)
        import decoder2
        decoder = decoder2.DecoderGruCond(2,
                                          option['method'],
                                          tedim,
                                          thdim,
                                          ahdim,
                                          2 * shdim + thdim,
                                          dim_readout=deephid,
                                          n_y_vocab=tvsize)

        # training graph
        with ops.variable_scope(scope,
                                initializer=initializer,
                                regularizer=regularizer,
                                dtype=dtype):
            src_seq = T.imatrix("source_sequence")
            src_mask = T.matrix("source_sequence_mask")
            tgt_seq = T.imatrix("target_sequence")
            tgt_mask = T.matrix("target_sequence_mask")
            byseq = T.imatrix("backward_target_sequence")

            with ops.variable_scope("source_embedding"):
                source_embedding = ops.get_variable("embedding",
                                                    [svsize, sedim])
                source_bias = ops.get_variable("bias", [sedim])

            with ops.variable_scope("target_embedding"):
                target_embedding = ops.get_variable("embedding",
                                                    [tvsize, tedim])
                target_bias = ops.get_variable("bias", [tedim])

            source_inputs = nn.embedding_lookup(source_embedding,
                                                src_seq) + source_bias
            target_inputs = nn.embedding_lookup(target_embedding,
                                                tgt_seq) + target_bias
            by_inputs = nn.embedding_lookup(target_embedding,
                                            byseq) + target_bias

            if keep_prob < 1.0:
                source_inputs = nn.dropout(source_inputs, keep_prob=keep_prob)
                target_inputs = nn.dropout(target_inputs, keep_prob=keep_prob)
                by_inputs = nn.dropout(by_inputs, keep_prob=keep_prob)

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            annotation = nn.dropout(annotation, keep_prob=keep_prob)

            import softdec
            soft_decoder = softdec.SoftDecoder(option["eosid"],
                                               option["softk"],
                                               tedim,
                                               thdim,
                                               ahdim,
                                               2 * shdim,
                                               dim_readout=deephid,
                                               n_y_vocab=tvsize)
            with ops.variable_scope('soft_decoder'):
                initial_state = nn.feedforward(states[-1], [shdim, thdim],
                                               True,
                                               scope='initial',
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)
                soft_states, _, _, soft_mask = soft_decoder.infer(
                    mapped_keys, src_mask, annotation, initial_state,
                    target_embedding, target_bias, keep_prob)

            with ops.variable_scope('soft_decoder', reuse=True):
                _, _, soft_cost, _ = soft_decoder.forward(
                    byseq, by_inputs, tgt_mask, mapped_keys, src_mask,
                    annotation, initial_state, keep_prob)

            # compute initial state for decoder
            # first state of backward encoder
            # initialize with only encoder state
            final_state = r_states[0]

            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                # keys for query
                with ops.variable_scope('map-key-src'):
                    mapped_keys_src = map_key(annotation, 2 * shdim, ahdim)
                with ops.variable_scope('map-key-soft'):
                    mapped_keys_soft = map_key(soft_states, thdim, ahdim)

                _, _, _, snt_cost = decoder.forward(
                    tgt_seq, target_inputs, tgt_mask,
                    [mapped_keys_src, mapped_keys_soft], [src_mask, soft_mask],
                    [annotation, soft_states], initial_state, keep_prob)

            ce = snt_cost
            true_cost = T.mean(ce)
            lamb = theano.shared(numpy.asarray(option['lambda'], dtype),
                                 'lambda')
            cost = lamb * soft_cost + (1 - lamb) * true_cost

        # import utils.ttensor
        # print 'true_cost %d:' % len(utils.ttensor.find_inputs_and_params(true_cost)[0])
        # for xxx in utils.ttensor.find_inputs_and_params(true_cost)[0]:
        #     print '\t', xxx
        # print 'soft_cost %d:' % len(utils.ttensor.find_inputs_and_params(soft_cost)[0])
        # for xxx in utils.ttensor.find_inputs_and_params(soft_cost)[0]:
        #     print '\t', xxx
        # print 'tot_cost: %d' % len(utils.ttensor.find_inputs_and_params(cost)[0])
        # for xxx in utils.ttensor.find_inputs_and_params(cost)[0]:
        #     print '\t', xxx
        # print 'snt_cost: %d' % len(utils.ttensor.find_inputs_and_params(snt_cost)[0])
        # for xxx in utils.ttensor.find_inputs_and_params(snt_cost)[0]:
        #     print '\t', xxx

        training_inputs = [src_seq, src_mask, tgt_seq, tgt_mask, byseq]
        training_outputs = [cost, soft_cost, true_cost]

        # get_snt_cost = theano.function(training_inputs[:4], snt_cost)
        get_snt_cost = None

        # decoding graph
        with ops.variable_scope(scope, reuse=True):
            prev_words = T.ivector("prev_words")

            # disable dropout
            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            source_inputs = source_inputs + source_bias
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)
            target_inputs = target_inputs + target_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope('soft_decoder'):
                initial_state = nn.feedforward(states[-1], [shdim, thdim],
                                               True,
                                               scope='initial',
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)
                soft_states, soft_contexts, soft_probs, soft_mask = soft_decoder.infer(
                    mapped_keys, src_mask, annotation, initial_state,
                    target_embedding, target_bias, 1.0)

            # decoder
            final_state = r_states[0]
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                # keys for query
                with ops.variable_scope('map-key-src'):
                    mapped_keys_src = map_key(annotation, 2 * shdim, ahdim)
                with ops.variable_scope('map-key-soft'):
                    mapped_keys_soft = map_key(soft_states, thdim, ahdim)

            prev_inputs = nn.embedding_lookup(target_embedding, prev_words)
            prev_inputs = prev_inputs + target_bias

            cond = T.neq(prev_words, 0)
            # zeros out embedding if y is 0, which indicates <s>
            prev_inputs = prev_inputs * cond[:, None]

            with ops.variable_scope(decoder_scope):
                mask = T.ones_like(prev_words, dtype=dtype)
                next_state, context = decoder.step(
                    prev_inputs, mask, initial_state, *[
                        mapped_keys_src, mapped_keys_soft, annotation,
                        soft_states, src_mask, soft_mask
                    ])
                probs = decoder.prediction(prev_inputs, next_state, context)

                # encoding
        encoding_inputs = [src_seq, src_mask]
        encoding_outputs = [
            initial_state, annotation, soft_states, mapped_keys_src,
            mapped_keys_soft, soft_mask
        ]
        encode = theano.function(encoding_inputs, encoding_outputs)

        if option["decoder"] == "GruSimple":
            raise ValueError()
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys, src_mask
            ]
            prediction_outputs = [probs, context]
            predict = theano.function(prediction_inputs, prediction_outputs)

            generation_inputs = [prev_words, initial_state, context]
            generation_outputs = next_state
            generate = theano.function(generation_inputs, generation_outputs)

            self.predict = predict
            self.generate = generate
        elif option["decoder"] == "GruCond":
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys_src,
                src_mask, soft_states, mapped_keys_soft, soft_mask
            ]
            prediction_outputs = [probs, next_state]
            predict = theano.function(prediction_inputs, prediction_outputs)
            self.predict = predict

        self.cost = cost
        self.inputs = training_inputs
        self.outputs = training_outputs
        self.updates = []
        self.align = None
        self.sample = None
        self.encode = encode

        self.get_snt_cost = get_snt_cost
        self.option = option
コード例 #3
0
ファイル: rnnsearch.py プロジェクト: middlekisser/PEA-NMT
    def __init__(self, **option):
        # source and target embedding dim
        sedim, tedim = option["embdim"]
        # source, target and attention hidden dim
        shdim, thdim, ahdim = option["hidden"]
        # maxout hidden dim
        maxdim = option["maxhid"]
        # maxout part
        maxpart = option["maxpart"]
        # deepout hidden dim
        deephid = option["deephid"]
        svocab, tvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab
        # source and target vocabulary size
        svsize, tvsize = len(sid2w), len(tid2w)

        if "scope" not in option or option["scope"] is None:
            option["scope"] = "rnnsearch"

        if "initializer" not in option:
            option["initializer"] = None

        if "regularizer" not in option:
            option["regularizer"] = None

        if "keep_prob" not in option:
            option["keep_prob"] = 1.0

        dtype = theano.config.floatX
        initializer = option["initializer"]
        regularizer = option["regularizer"]
        keep_prob = option["keep_prob"] or 1.0

        scope = option["scope"]
        decoder_scope = "decoder"

        encoder = Encoder(sedim, shdim)
        decoderType = eval("Decoder{}".format(option["decoder"]))
        decoder = decoderType(tedim, thdim, ahdim, 2 * shdim, dim_maxout=maxdim, max_part=maxpart, dim_readout=deephid,
                              n_y_vocab=tvsize)

        # training graph
        with ops.variable_scope(scope, initializer=initializer,
                                regularizer=regularizer, dtype=dtype):
            src_seq = T.imatrix("source_sequence")
            src_mask = T.matrix("source_sequence_mask")
            tgt_seq = T.imatrix("target_sequence")
            tgt_mask = T.matrix("target_sequence_mask")

            with ops.variable_scope("source_embedding"):
                source_embedding = ops.get_variable("embedding",
                                                    [svsize, sedim])
                source_bias = ops.get_variable("bias", [sedim])

            with ops.variable_scope("target_embedding") as tgtembscope:
                target_embedding = ops.get_variable("embedding",
                                                    [tvsize, tedim])
                # target_bias = ops.get_variable("bias", [tedim])
                decoder.tiescope = tgtembscope

            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)

            source_inputs = source_inputs + source_bias

            if keep_prob < 1.0:
                source_inputs = nn.dropout(source_inputs, keep_prob=keep_prob)
                target_inputs = nn.dropout(target_inputs, keep_prob=keep_prob)

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            # compute initial state for decoder
            # first state of backward encoder
            final_state = r_states[0]
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True, scope="initial",
                                               activation=T.tanh)
                # keys for query
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)

                _, _, cost,_  = decoder.forward(tgt_seq, target_inputs, tgt_mask, mapped_keys, src_mask,
                                                    annotation, initial_state, keep_prob)


        training_inputs = [src_seq, src_mask, tgt_seq, tgt_mask]
        training_outputs = [cost]

        # decoding graph
        with ops.variable_scope(scope, reuse=True):
            prev_words = T.ivector("prev_words")

            # disable dropout
            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            source_inputs = source_inputs + source_bias
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)
            # target_inputs = target_inputs + target_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            # decoder
            final_state = r_states[0]
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True, scope="initial",
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)

            prev_inputs = nn.embedding_lookup(target_embedding, prev_words)
            # prev_inputs = prev_inputs + target_bias

            cond = T.neq(prev_words, 0)
            # zeros out embedding if y is 0, which indicates <s>
            prev_inputs = prev_inputs * cond[:, None]

            with ops.variable_scope(decoder_scope):
                mask = T.ones_like(prev_words, dtype=dtype)
                next_state, context = decoder.step(prev_inputs, mask, initial_state, mapped_keys, annotation, src_mask)
                if option["decoder"] == "GruSimple":
                    probs = decoder.prediction(prev_inputs, initial_state, context)
                elif option["decoder"] == "GruCond":
                    probs = decoder.prediction(prev_inputs, next_state, context)

        # encoding
        encoding_inputs = [src_seq, src_mask]
        encoding_outputs = [annotation, initial_state, mapped_keys]
        encode = theano.function(encoding_inputs, encoding_outputs)

        if option["decoder"] == "GruSimple":
            prediction_inputs = [prev_words, initial_state, annotation,
                                 mapped_keys, src_mask]
            prediction_outputs = [probs, context]
            predict = theano.function(prediction_inputs, prediction_outputs)

            generation_inputs = [prev_words, initial_state, context]
            generation_outputs = next_state
            generate = theano.function(generation_inputs, generation_outputs)

            self.predict = predict
            self.generate = generate
        elif option["decoder"] == "GruCond":
            prediction_inputs = [prev_words, initial_state, annotation,
                                 mapped_keys, src_mask]
            prediction_outputs = [probs, next_state]
            predict = theano.function(prediction_inputs, prediction_outputs)
            self.predict = predict

        # optional graph
        '''
        with ops.variable_scope(scope, reuse=True):
            sample = decoder.build_sampling(src_seq, src_mask, target_embedding, target_bias, mapped_keys,
                                            annotation, initial_state)
            align = decoder.build_attention(src_seq, src_mask, target_inputs, tgt_seq, tgt_mask, mapped_keys,
                                            annotation, initial_state)
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim, thdim],
                                               True, scope="initial",
                                               activation=T.tanh)
                # keys for query
                mapped_keys = map_key(annotation, 2 * shdim, ahdim)

                _, _, _,snt_cost  = decoder.forward(tgt_seq, target_inputs, tgt_mask, mapped_keys, src_mask,
                                                    annotation, initial_state, 1.0)
            get_snt_cost = theano.function(training_inputs, snt_cost)
        '''
        self.cost = cost
        self.inputs = training_inputs
        self.outputs = training_outputs
        self.updates = []
        # self.align = align
        # self.sample = sample
        self.encode = encode

        # self.get_snt_cost = get_snt_cost
        self.option = option
コード例 #4
0
    def __init__(self, **option):
        # source and target embedding dim
        sedim, tedim, xposhdim, yposhdim = option["embdim"]
        # source, target and attention hidden dim
        shdim, thdim, ahdim, xposnn, yposnn, word2pos, pos2word, pos2pos = option[
            "hidden"]
        # maxout hidden dim
        maxdim = option["maxhid"]
        # maxout part
        maxpart = option["maxpart"]
        # deepout hidden dim
        deephid = option["deephid"]
        svocab, tvocab, tagvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab

        stag2id, ttag2id = tagvocab
        # source and target vocabulary size
        svsize, tvsize = len(sid2w), len(tid2w)
        stagsize, ttagsize = len(stag2id), len(ttag2id)

        if "scope" not in option or option["scope"] is None:
            option["scope"] = "rnnsearch"

        if "initializer" not in option:
            option["initializer"] = None

        if "regularizer" not in option:
            option["regularizer"] = None

        if "keep_prob" not in option:
            option["keep_prob"] = 1.0

        dtype = theano.config.floatX
        initializer = option["initializer"]
        regularizer = option["regularizer"]
        keep_prob = option["keep_prob"] or 1.0

        scope = option["scope"]
        decoder_scope = "decoder"

        encoder = Encoder(sedim, shdim)
        decoderType = eval("Decoder{}".format(option["decoder"]))
        decoder = decoderType(tedim,
                              thdim,
                              ahdim,
                              2 * shdim + xposhdim,
                              dim_maxout=maxdim,
                              max_part=maxpart,
                              dim_readout=deephid,
                              n_y_vocab=tvsize,
                              n_y_tagvocab=ttagsize,
                              poshdim=yposhdim,
                              posnndim=yposnn,
                              word2pos=word2pos,
                              pos2word=pos2word,
                              pos2pos=pos2pos)

        # training graph
        with ops.variable_scope(scope,
                                initializer=initializer,
                                regularizer=regularizer,
                                dtype=dtype):
            src_seq = T.imatrix("source_sequence")
            src_mask = T.matrix("source_sequence_mask")
            tgt_seq = T.imatrix("target_sequence")
            tgt_mask = T.matrix("target_sequence_mask")
            src_pos = T.imatrix("source_postag")
            tgt_pos = T.imatrix("target_postag")

            with ops.variable_scope("source_embedding"):
                source_embedding = ops.get_variable("embedding",
                                                    [svsize, sedim])
                source_bias = ops.get_variable("bias", [sedim])

            with ops.variable_scope("target_embedding"):
                target_embedding = ops.get_variable("embedding",
                                                    [tvsize, tedim])
                target_bias = ops.get_variable("bias", [tedim])

            with ops.variable_scope("srctag_embedding"):
                srctag_embedding = ops.get_variable("embedding",
                                                    [stagsize, xposhdim])
                srctag_bias = ops.get_variable("bias", [xposhdim])

            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)

            source_inputs = source_inputs + source_bias
            target_inputs = target_inputs + target_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope("srcpostagger"):
                tempstates = nn.feedforward(annotation, [shdim * 2, xposnn],
                                            True,
                                            scope="staggerstates",
                                            activation=T.nnet.relu)
                scores = nn.linear(tempstates, [xposnn, stagsize],
                                   True,
                                   scope="staggerscores")

                new_shape = [scores.shape[0] * scores.shape[1], -1]
                scores = scores.reshape(new_shape)
                srcposprobs = T.nnet.softmax(scores)

                srctaggerstates = T.dot(srcposprobs,
                                        srctag_embedding) + srctag_bias
                srctaggerstates = srctaggerstates.reshape(
                    [annotation.shape[0], annotation.shape[1], -1])

                idx = T.arange(src_pos.flatten().shape[0])
                ce = -T.log(srcposprobs[idx, src_pos.flatten()])
                ce = ce.reshape(src_pos.shape)
                ce = T.sum(ce * src_mask, 0)
                srcpos_cost = T.mean(ce)

            tempposkeys = T.concatenate([srctaggerstates, tempstates], -1)

            src_words_keys = map_key(annotation, 2 * shdim, ahdim,
                                     "srcwordkeys")
            src_pos_keys = map_key(tempposkeys, xposnn + xposhdim, word2pos,
                                   "srcposkeys")

            pos_words_keys = map_key(annotation, 2 * shdim, pos2word,
                                     "pos2wordkeys")
            pos_pos_keys = map_key(tempposkeys, xposnn + xposhdim, pos2pos,
                                   "pos2poskeys")

            annotation = T.concatenate([annotation, srctaggerstates], -1)

            # compute initial state for decoder
            # first state of backward encoder
            final_state = T.concatenate([r_states[0], srctaggerstates[0]], -1)
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state,
                                               [shdim + xposhdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)

                _, _, transcost, _, tgtpos_cost = decoder.forward(
                    tgt_seq, target_inputs, tgt_mask, src_words_keys,
                    src_pos_keys, pos_words_keys, pos_pos_keys, src_mask,
                    annotation, initial_state, tgt_pos, keep_prob)

        lambx = theano.shared(numpy.asarray(option["lambda"][0], dtype),
                              "lambdax")
        lamby = theano.shared(numpy.asarray(option["lambda"][1], dtype),
                              "lambday")

        totalcost = transcost + lambx * srcpos_cost + lamby * tgtpos_cost
        training_inputs = [
            src_seq, src_mask, tgt_seq, tgt_mask, src_pos, tgt_pos
        ]
        training_outputs = [srcpos_cost, tgtpos_cost, transcost, totalcost]

        # decoding graph
        with ops.variable_scope(scope, reuse=True):
            prev_words = T.ivector("prev_words")

            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            source_inputs = source_inputs + source_bias
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)
            target_inputs = target_inputs + target_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope("srcpostagger"):
                tempstates = nn.feedforward(annotation, [shdim * 2, xposnn],
                                            True,
                                            scope="staggerstates",
                                            activation=T.nnet.relu)
                scores = nn.linear(tempstates, [xposnn, stagsize],
                                   True,
                                   scope="staggerscores")

                new_shape = [scores.shape[0] * scores.shape[1], -1]
                scores = scores.reshape(new_shape)
                srcposprobs = T.nnet.softmax(scores)

                srctaggerstates = T.dot(srcposprobs,
                                        srctag_embedding) + srctag_bias
                srctaggerstates = srctaggerstates.reshape(
                    [annotation.shape[0], annotation.shape[1], -1])

            tempposkeys = T.concatenate([srctaggerstates, tempstates], -1)

            src_words_keys = map_key(annotation, 2 * shdim, ahdim,
                                     "srcwordkeys")
            src_pos_keys = map_key(tempposkeys, xposnn + xposhdim, word2pos,
                                   "srcposkeys")

            pos_words_keys = map_key(annotation, 2 * shdim, pos2word,
                                     "pos2wordkeys")
            pos_pos_keys = map_key(tempposkeys, xposnn + xposhdim, pos2pos,
                                   "pos2poskeys")

            annotation = T.concatenate([annotation, srctaggerstates], -1)

            # decoder
            final_state = T.concatenate([r_states[0], srctaggerstates[0]], -1)
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state,
                                               [shdim + xposhdim, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)

            prev_inputs = nn.embedding_lookup(target_embedding, prev_words)
            prev_inputs = prev_inputs + target_bias

            cond = T.neq(prev_words, 0)
            # zeros out embedding if y is 0, which indicates <s>
            prev_inputs = prev_inputs * cond[:, None]

            with ops.variable_scope(decoder_scope):
                mask = T.ones_like(prev_words, dtype=dtype)
                next_state, context, next_pos, tgtposprob = decoder.step(
                    prev_inputs, mask, initial_state, src_words_keys,
                    src_pos_keys, pos_words_keys, pos_pos_keys, annotation,
                    src_mask)
                if option["decoder"] == "GruSimple":
                    probs = decoder.prediction(prev_inputs, initial_state,
                                               context)
                elif option["decoder"] == "GruCond":
                    probs = decoder.prediction(prev_inputs, next_state,
                                               context, next_pos)

        # encoding
        encoding_inputs = [src_seq, src_mask]
        encoding_outputs = [
            annotation, initial_state, src_words_keys, src_pos_keys,
            pos_words_keys, pos_pos_keys, srcposprobs
        ]
        encode = theano.function(encoding_inputs, encoding_outputs)

        if option["decoder"] == "GruSimple":
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys, src_mask
            ]
            prediction_outputs = [probs, context]
            predict = theano.function(prediction_inputs, prediction_outputs)

            generation_inputs = [prev_words, initial_state, context]
            generation_outputs = next_state
            generate = theano.function(generation_inputs, generation_outputs)

            self.predict = predict
            self.generate = generate
        elif option["decoder"] == "GruCond":
            prediction_inputs = [
                prev_words, initial_state, annotation, src_words_keys,
                src_pos_keys, pos_words_keys, pos_pos_keys, src_mask
            ]
            prediction_outputs = [probs, next_state, tgtposprob]
            predict = theano.function(prediction_inputs,
                                      prediction_outputs,
                                      on_unused_input='warn')
            self.predict = predict

        self.cost = totalcost
        self.inputs = training_inputs
        self.outputs = training_outputs
        self.updates = []
        self.encode = encode
        self.option = option
コード例 #5
0
    def __init__(self, **option):
        # source and target embedding dim
        sedim, tedim = option["embdim"]
        # source, target and attention hidden dim
        shdim, thdim, ahdim, domaindim, feadim = option["hidden"]
        # maxout hidden dim
        maxdim = option["maxhid"]
        # maxout part
        maxpart = option["maxpart"]
        # deepout hidden dim
        deephid = option["deephid"]
        svocab, tvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab
        # source and target vocabulary size
        svsize, tvsize = len(sid2w), len(tid2w)
        dnum = option['dnum']

        if "scope" not in option or option["scope"] is None:
            option["scope"] = "rnnsearch"

        if "initializer" not in option:
            option["initializer"] = None

        if "regularizer" not in option:
            option["regularizer"] = None

        if "keep_prob" not in option:
            option["keep_prob"] = 1.0

        dtype = theano.config.floatX
        initializer = option["initializer"]
        regularizer = option["regularizer"]
        keep_prob = option["keep_prob"] or 1.0

        scope = option["scope"]
        decoder_scope = "decoder"

        encoder = Encoder(sedim, shdim)
        decoderType = eval("Decoder{}".format(option["decoder"]))
        decoder = decoderType(tedim,
                              thdim,
                              ahdim,
                              2 * shdim,
                              dnum=dnum,
                              dim_maxout=maxdim,
                              max_part=maxpart,
                              dim_readout=deephid,
                              dim_domain=domaindim,
                              feadim=feadim,
                              n_y_vocab=tvsize)

        # training graph
        with ops.variable_scope(scope,
                                initializer=initializer,
                                regularizer=regularizer,
                                dtype=dtype):
            src_seq = T.imatrix("source_sequence")
            src_mask = T.matrix("source_sequence_mask")
            tgt_seq = T.imatrix("target_sequence")
            tgt_mask = T.matrix("target_sequence_mask")
            tag_seq = T.imatrix("domain_tag")
            # nsrc_mask = T.set_subtensor(src_mask[T.cast(T.sum(src_mask, 0) - 1, 'int32'),
            #                                      T.arange(src_mask.shape[1])], 0.0)

            with ops.variable_scope("source_embedding"):
                source_embedding = ops.get_variable("embedding",
                                                    [svsize, sedim])
                source_bias = ops.get_variable("bias", [sedim])

            with ops.variable_scope("target_embedding") as tgtembscope:
                target_embedding = ops.get_variable("embedding",
                                                    [tvsize, tedim])
                # target_bias = ops.get_variable("bias", [tedim])
                decoder.tiescope = tgtembscope

            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            target_inputs = nn.embedding_lookup(target_embedding, tgt_seq)

            source_inputs = source_inputs + source_bias

            if keep_prob < 1.0:
                source_inputs = nn.dropout(source_inputs, keep_prob=keep_prob)
                target_inputs = nn.dropout(target_inputs, keep_prob=keep_prob)

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope("Specific"):
                domain_alpha = domain_sensitive_attention(
                    annotation, src_mask, shdim * 2, domaindim)
                # domain_alpha = attention(r_states[0], annotation, nsrc_mask,
                #                          shdim,
                #                          shdim * 2)
                domain_context = T.sum(annotation * domain_alpha[:, :, None],
                                       0)
                dfeature = nn.feedforward(domain_context, [shdim * 2, feadim],
                                          True,
                                          activation=T.tanh,
                                          scope="feature1")

                dscores = nn.feedforward(dfeature, [feadim, dnum],
                                         True,
                                         activation=T.tanh,
                                         scope="score")
                # (batch, 2)
                dprobs = T.nnet.softmax(dscores)
                dpred_tag = T.argmax(dprobs, 1)
                didx = T.arange(tag_seq.flatten().shape[0])
                dce = -T.log(dprobs[didx, tag_seq.flatten()])
                dcost = T.mean(dce)

            share_alpha = domain_sensitive_attention(annotation, src_mask,
                                                     shdim * 2, domaindim)
            # share_alpha = attention(r_states[0], annotation, nsrc_mask,
            #                         shdim,
            #                         shdim * 2)
            share_context = T.sum(annotation * share_alpha[:, :, None], 0)
            sfeature = nn.feedforward(share_context, [shdim * 2, feadim],
                                      True,
                                      activation=T.tanh,
                                      scope="feature1")

            with ops.variable_scope("Shared"):
                sscores = nn.feedforward(sfeature, [feadim, dnum],
                                         True,
                                         activation=T.tanh,
                                         scope="score")
                # (batch, 2)
                sprobs = T.nnet.softmax(sscores)
                spred_tag = T.argmax(sprobs, 1)
                sidx = T.arange(tag_seq.flatten().shape[0])
                sce = -T.log(sprobs[sidx, tag_seq.flatten()])
                scost = T.mean(sce)
                adv_sce = -sprobs[sidx, tag_seq.flatten()] * T.log(
                    sprobs[sidx, tag_seq.flatten()])
                adv_scost = T.mean(adv_sce)

            domain_gate = nn.feedforward([dfeature, annotation],
                                         [[feadim, shdim * 2], shdim * 2],
                                         True,
                                         scope="domain_gate")
            domain_annotation = annotation * domain_gate
            domain_annotation = nn.dropout(domain_annotation,
                                           keep_prob=keep_prob)
            share_gate = nn.feedforward([sfeature, annotation],
                                        [[feadim, shdim * 2], shdim * 2],
                                        True,
                                        scope="share_gate")
            annotation = annotation * share_gate
            annotation = nn.dropout(annotation, keep_prob=keep_prob)

            # compute initial state for decoder
            # first state of backward encoder
            # batch * shdim
            final_state = T.concatenate([
                annotation[0, :, annotation.shape[-1] / 2:],
                domain_annotation[0, :, annotation.shape[-1] / 2:]
            ], -1)
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim * 2, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                # keys for query
                mapped_keys = map_key(annotation, 2 * shdim, ahdim, "semantic")
                mapped_domain_keys = map_key(domain_annotation, 2 * shdim,
                                             ahdim, "domain")

                _, _, cost, tgtdcost, tpred_tag, _ = decoder.forward(
                    tgt_seq, target_inputs, tgt_mask, mapped_keys, src_mask,
                    annotation, initial_state, mapped_domain_keys,
                    domain_annotation, tag_seq, keep_prob)

        lamb = theano.shared(numpy.asarray(option["lambda"], dtype), "lambda")
        # cwscost *= lamb
        final_cost = cost + dcost + tgtdcost - lamb * adv_scost

        tag_inputs = [src_seq, src_mask]
        tag_outputs = [dpred_tag, spred_tag]
        tag_predict = theano.function(tag_inputs, tag_outputs)
        self.tag_predict = tag_predict

        tgt_tag_inputs = [src_seq, src_mask, tgt_seq, tgt_mask]
        tgt_tag_outputs = [tpred_tag]
        tgt_tag_predict = theano.function(tgt_tag_inputs, tgt_tag_outputs)
        self.tgt_tag_predict = tgt_tag_predict

        training_inputs = [src_seq, src_mask, tgt_seq, tgt_mask, tag_seq]
        training_outputs = [cost, dcost, adv_scost, tgtdcost]

        self.cost_cla = scost
        self.inputs_cla = [src_seq, src_mask, tag_seq]
        self.outputs_cla = [scost]

        # decoding graph
        with ops.variable_scope(scope, reuse=True):
            prev_words = T.ivector("prev_words")

            # disable dropout
            source_inputs = nn.embedding_lookup(source_embedding, src_seq)
            source_inputs = source_inputs + source_bias

            states, r_states = encoder.forward(source_inputs, src_mask)
            annotation = T.concatenate([states, r_states], 2)

            with ops.variable_scope("Specific"):
                domain_alpha = domain_sensitive_attention(
                    annotation, src_mask, shdim * 2, domaindim)
                # domain_alpha = attention(r_states[0], annotation, nsrc_mask,
                #                          shdim,
                #                          shdim * 2)
                domain_context = T.sum(annotation * domain_alpha[:, :, None],
                                       0)
                dfeature = nn.feedforward(domain_context, [shdim * 2, feadim],
                                          True,
                                          activation=T.tanh,
                                          scope="feature1")

            share_alpha = domain_sensitive_attention(annotation, src_mask,
                                                     shdim * 2, domaindim)
            # share_alpha = attention(r_states[0], annotation, nsrc_mask,
            #                         shdim,
            #                         shdim * 2)
            share_context = T.sum(annotation * share_alpha[:, :, None], 0)
            sfeature = nn.feedforward(share_context, [shdim * 2, feadim],
                                      True,
                                      activation=T.tanh,
                                      scope="feature1")

            domain_gate = nn.feedforward([dfeature, annotation],
                                         [[feadim, shdim * 2], shdim * 2],
                                         True,
                                         scope="domain_gate")
            domain_annotation = annotation * domain_gate
            share_gate = nn.feedforward([sfeature, annotation],
                                        [[feadim, shdim * 2], shdim * 2],
                                        True,
                                        scope="share_gate")
            annotation = annotation * share_gate

            # decoder
            final_state = T.concatenate([
                annotation[0, :, annotation.shape[-1] / 2:],
                domain_annotation[0, :, annotation.shape[-1] / 2:]
            ], -1)
            with ops.variable_scope(decoder_scope):
                initial_state = nn.feedforward(final_state, [shdim * 2, thdim],
                                               True,
                                               scope="initial",
                                               activation=T.tanh)
                mapped_keys = map_key(annotation, 2 * shdim, ahdim, "semantic")
                mapped_domain_keys = map_key(domain_annotation, 2 * shdim,
                                             ahdim, "domain")

            prev_inputs = nn.embedding_lookup(target_embedding, prev_words)
            # prev_inputs = prev_inputs + target_bias

            cond = T.neq(prev_words, 0)
            # zeros out embedding if y is 0, which indicates <s>
            prev_inputs = prev_inputs * cond[:, None]

            with ops.variable_scope(decoder_scope):
                mask = T.ones_like(prev_words, dtype=dtype)
                next_state, context = decoder.step(prev_inputs, mask,
                                                   initial_state, mapped_keys,
                                                   annotation, src_mask,
                                                   mapped_domain_keys,
                                                   domain_annotation)
                if option["decoder"] == "GruSimple":
                    probs = decoder.prediction(prev_inputs, initial_state,
                                               context)
                elif option["decoder"] == "GruCond":
                    probs = decoder.prediction(prev_inputs, next_state,
                                               context)

        # encoding
        encoding_inputs = [src_seq, src_mask]
        encoding_outputs = [
            annotation, initial_state, mapped_keys, mapped_domain_keys,
            domain_annotation
        ]
        encode = theano.function(encoding_inputs, encoding_outputs)

        if option["decoder"] == "GruSimple":
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys, src_mask
            ]
            prediction_outputs = [probs, context]
            predict = theano.function(prediction_inputs, prediction_outputs)

            generation_inputs = [prev_words, initial_state, context]
            generation_outputs = next_state
            generate = theano.function(generation_inputs, generation_outputs)

            self.predict = predict
            self.generate = generate
        elif option["decoder"] == "GruCond":
            prediction_inputs = [
                prev_words, initial_state, annotation, mapped_keys, src_mask,
                mapped_domain_keys, domain_annotation
            ]
            prediction_outputs = [probs, next_state]
            predict = theano.function(prediction_inputs, prediction_outputs)
            self.predict = predict

        self.cost = final_cost
        self.inputs = training_inputs
        self.outputs = training_outputs
        self.updates = []
        # self.align = align
        # self.sample = sample
        self.encode = encode
        # self.get_snt_cost = get_snt_cost
        self.option = option