def LSTM_sequence_classifer_net(feature, num_output_classes, embedding_dim, LSTM_dim, cell_dim):
    embedding_function = embedding(feature, embedding_dim)
    LSTM_function = LSTMP_component_with_self_stabilization(
        embedding_function.output, LSTM_dim, cell_dim)[0]
    thought_vector = sequence.last(LSTM_function)

    return linear_layer(thought_vector, num_output_classes)
def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim, LSTM_dim, cell_dim):
    embedding_function = embedding(input, embedding_dim)
    LSTM_function = LSTMP_component_with_self_stabilization(
        embedding_function.output, LSTM_dim, cell_dim)[0]
    thought_vector = sequence.last(LSTM_function)

    return linear_layer(thought_vector, num_output_classes)
Exemple #3
0
    def __init__(self, config=get_config(), **option):
        scope = config.scope

        sedim, tedim = option["embdim"]
        shdim, thdim, ahdim = option["hidden"]
        maxdim = option["maxhid"]
        deephid = option["deephid"]
        k = option["maxpart"]
        svocab, tvocab = option["vocabulary"]
        sw2id, sid2w = svocab
        tw2id, tid2w = tvocab
        svsize = len(sid2w)
        tvsize = len(tid2w)

        with variable_scope(scope):
            source_embedding = embedding(svsize, sedim,
                                         config.source_embedding)
            target_embedding = embedding(tvsize, tedim,
                                         config.target_embedding)
            rnn_encoder = encoder(sedim, shdim, config.encoder)
            rnn_decoder = decoder(tedim, shdim, thdim, ahdim, maxdim, k,
                                  deephid, tvsize, config.decoder)

        params = []
        params.extend(source_embedding.parameter)
        params.extend(target_embedding.parameter)
        params.extend(rnn_encoder.parameter)
        params.extend(rnn_decoder.parameter)

        def training_graph():
            xseq = theano.tensor.imatrix()
            xmask = theano.tensor.matrix()
            yseq = theano.tensor.imatrix()
            ymask = theano.tensor.matrix()

            xemb = source_embedding(xseq)
            yemb = target_embedding(yseq)
            initstate = theano.tensor.zeros((xemb.shape[1], shdim))

            annotation = rnn_encoder(xemb, xmask, initstate)
            probs = rnn_decoder(yemb, xmask, ymask, annotation)

            idx = theano.tensor.arange(yseq.flatten().shape[0])
            cost = -theano.tensor.log(probs[idx, yseq.flatten()])
            cost = cost.reshape(yseq.shape)
            cost = theano.tensor.sum(cost * ymask, 0)
            cost = theano.tensor.mean(cost)

            return [xseq, xmask, yseq, ymask], [cost]

        def attention_graph():
            xseq = theano.tensor.imatrix()
            xmask = theano.tensor.matrix()
            yseq = theano.tensor.imatrix()
            ymask = theano.tensor.matrix()

            xemb = source_embedding(xseq)
            yemb = target_embedding(yseq)
            initstate = theano.tensor.zeros((xemb.shape[1], shdim))

            annotation = rnn_encoder(xemb, xmask, initstate)
            alpha = rnn_decoder.compute_attention_score(
                yemb, xmask, ymask, annotation)

            return [xseq, xmask, yseq, ymask], alpha

        def sampling_graph():
            seed = option["seed"]
            seed_rng = numpy.random.RandomState(numpy.random.randint(seed))
            tseed = seed_rng.randint(numpy.iinfo(numpy.int32).max)
            stream = theano.sandbox.rng_mrg.MRG_RandomStreams(tseed)

            xseq = theano.tensor.imatrix()
            xmask = theano.tensor.matrix()
            maxlen = theano.tensor.iscalar()

            batch = xseq.shape[1]
            xemb = source_embedding(xseq)
            initstate = theano.tensor.zeros([batch, shdim])

            annot = rnn_encoder(xemb, xmask, initstate)

            ymask = theano.tensor.ones([batch])
            istate, mannot = rnn_decoder.compute_initstate(annot)

            def sample_step(pemb, state, xmask, ymask, annot, mannot):
                alpha, context = rnn_decoder.compute_context(
                    state, xmask, annot, mannot)
                probs = rnn_decoder.compute_probability(pemb, state, context)
                next_words = stream.multinomial(pvals=probs).argmax(axis=1)
                yemb = target_embedding(next_words)
                next_state = rnn_decoder.compute_state(yemb, ymask, state,
                                                       context)
                return [next_words, yemb, next_state]

            iemb = theano.tensor.zeros([batch, tedim])

            seqs = []
            outputs_info = [None, iemb, istate]
            nonseqs = [xmask, ymask, annot, mannot]

            outputs, u = theano.scan(sample_step,
                                     seqs,
                                     outputs_info,
                                     nonseqs,
                                     n_steps=maxlen)

            return [xseq, xmask, maxlen], outputs[0], u

        # for beamsearch
        def encoding_graph():
            xseq = theano.tensor.imatrix()
            xmask = theano.tensor.matrix()

            xemb = source_embedding(xseq)
            initstate = theano.tensor.zeros((xseq.shape[1], shdim))
            annotation = rnn_encoder(xemb, xmask, initstate)

            return [xseq, xmask], annotation

        def initial_state_graph():
            annotation = theano.tensor.tensor3()

            # initstate, mapped_annotation
            outputs = rnn_decoder.compute_initstate(annotation)

            return [annotation], outputs

        def context_graph():
            state = theano.tensor.matrix()
            xmask = theano.tensor.matrix()
            annotation = theano.tensor.tensor3()
            mannotation = theano.tensor.tensor3()

            inputs = [state, xmask, annotation, mannotation]
            alpha, context = rnn_decoder.compute_context(*inputs)

            return inputs, [context, alpha]

        def probability_graph():
            y = theano.tensor.ivector()
            state = theano.tensor.matrix()
            context = theano.tensor.matrix()

            # 0 for initial index
            cond = theano.tensor.neq(y, 0)
            yemb = target_embedding(y)
            # zeros out embedding if y is 0
            yemb = yemb * cond[:, None]
            probs = rnn_decoder.compute_probability(yemb, state, context)

            return [y, state, context], probs

        def state_graph():
            y = theano.tensor.ivector()
            ymask = theano.tensor.vector()
            state = theano.tensor.matrix()
            context = theano.tensor.matrix()

            yemb = target_embedding(y)
            inputs = [yemb, ymask, state, context]
            new_state = rnn_decoder.compute_state(*inputs)

            return [y, ymask, state, context], new_state

        def compile_function(graph_fn):
            outputs = graph_fn()

            if len(outputs) == 2:
                inputs, outputs = outputs
                return theano.function(inputs, outputs)
            else:
                inputs, outputs, updates = outputs
                return theano.function(inputs, outputs, updates=updates)

        train_inputs, train_outputs = training_graph()

        search_fn = []
        search_fn.append(compile_function(encoding_graph))
        search_fn.append(compile_function(initial_state_graph))
        search_fn.append(compile_function(context_graph))
        search_fn.append(compile_function(probability_graph))
        search_fn.append(compile_function(state_graph))

        self.name = scope
        self.config = config
        self.parameter = params
        self.option = option
        self.cost = train_outputs[0]
        self.inputs = train_inputs
        self.outputs = train_outputs
        self.updates = []
        self.search = search_fn
        self.sampler = compile_function(sampling_graph)
        self.attention = compile_function(attention_graph)
Exemple #4
0
def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim,
                                LSTM_dim, cell_dim):
    embedded_inputs = embedding(input, embedding_dim)
    lstm_outputs = simple_lstm(embedded_inputs, LSTM_dim, cell_dim)[0]
    thought_vector = sequence.last(lstm_outputs)
    return linear_layer(thought_vector, num_output_classes)
Exemple #5
0
def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim,
                                LSTM_dim, cell_dim):
    embedded_inputs = embedding(input, embedding_dim)
    lstm_outputs = simple_lstm(embedded_inputs, LSTM_dim, cell_dim)[0]
    thought_vector = sequence.last(lstm_outputs)
    return linear_layer(thought_vector, num_output_classes)