Ejemplo n.º 1
0
    def build_graph(self,
                    input_network_outputs={},
                    reuse=True,
                    debug=False,
                    nornn=False):
        """"""

        with tf.variable_scope('Embeddings'):
            if self.sum_pos:  # TODO this should be done with a `POSMultivocab`
                pos_vocabs = list(
                    filter(lambda x: 'POS' in x.classname, self.input_vocabs))
                pos_tensors = [
                    input_vocab.get_input_tensor(embed_keep_prob=1,
                                                 reuse=reuse)
                    for input_vocab in pos_vocabs
                ]
                non_pos_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                    if 'POS' not in input_vocab.classname
                ]
                #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors]
                #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors]
                if pos_tensors:
                    pos_tensors = tf.add_n(pos_tensors)
                    if not reuse:
                        pos_tensors = [
                            pos_vocabs[0].drop_func(
                                pos_tensors, pos_vocabs[0].embed_keep_prob)
                        ]
                    else:
                        pos_tensors = [pos_tensors]
                input_tensors = non_pos_tensors + pos_tensors
            else:
                input_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(input_tensors, 2)

        n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1,
                                                 keepdims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder, 0)
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1

        root_weights = token_weights + (1 -
                                        nn.greater(tf.range(bucket_size), 0))
        # token_weights = root_weights
        # root_weights = token_weights
        token_weights3D = tf.expand_dims(
            token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
        token_weights2D = tf.expand_dims(
            root_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
        # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root(x)
        # in fact root should contained in second order prediction except sibling, but for simpler we set all for same
        token_weights4D = tf.cast(
            tf.expand_dims(token_weights2D, axis=-3) *
            tf.expand_dims(tf.expand_dims(root_weights, axis=-1), axis=-1),
            dtype=tf.float32)
        # abc -> ab,ac
        #token_weights_sib = tf.cast(tf.expand_dims(root_, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1),axis=-1),dtype=tf.float32)
        #abc -> ab,cb
        #pdb.set_trace()
        token_weights_cop = tf.cast(
            tf.expand_dims(token_weights2D, axis=-2) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=-1),
            dtype=tf.float32)
        token_weights_cop_0 = token_weights_cop[:, 0] * tf.cast(
            tf.transpose(token_weights3D, [0, 2, 1]), dtype=tf.float32)
        token_weights_cop = tf.concat(
            [token_weights_cop_0[:, None, :], token_weights_cop[:, 1:]], 1)
        #data=np.stack((devprint['printdata']['layer_cop'][0][0]*devprint['token_weights3D'][0].T)[None,:],devprint['printdata']['layer_cop'][0][1:])
        #abc -> ab, bc
        token_weights_gp = tf.cast(
            tf.expand_dims(tf.transpose(token_weights3D, [0, 2, 1]), axis=-1) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=1),
            dtype=tf.float32)
        #abc -> ca, ab
        token_weights_gp2 = tf.cast(
            tf.expand_dims(token_weights3D, axis=2) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=1),
            dtype=tf.float32)
        token_weights_sib = token_weights_gp

        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob
        if not nornn and not self.nornn:
            for i in six.moves.range(self.n_layers):
                conv_width = self.first_layer_conv_width if not i else self.conv_width
                with tf.variable_scope('RNN-{}'.format(i)):
                    layer, _ = recurrent.directed_RNN(
                        layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=self.bidirectional,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_include_prob=recur_include_prob,
                        recur_keep_prob=recur_keep_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func,
                        bilin=self.bilin)
        else:
            print('do not use RNN')
        output_fields = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}

        #pdb.set_trace()
        with tf.variable_scope('Classifiers'):
            if 'deprel' in output_fields:
                vocab = output_fields['deprel']
                if vocab.factorized:
                    head_vocab = output_fields['dephead']
                    head_vocab.token_weights_sib = token_weights_sib
                    head_vocab.token_weights_cop = token_weights_cop
                    head_vocab.token_weights_gp = token_weights_gp
                    head_vocab.token_weights_gp2 = token_weights_gp2
                    head_vocab.token_weights = token_weights
                    with tf.variable_scope('Unlabeled'):
                        if self.layer_mask(head_vocab):
                            unlabeled_outputs = head_vocab.get_bilinear_classifier(
                                layer,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug,
                                token_weights4D=token_weights4D,
                                sentence_mask=token_weights)
                        else:
                            unlabeled_outputs = head_vocab.get_bilinear_classifier(
                                layer,
                                token_weights=token_weights,
                                reuse=reuse)
                    with tf.variable_scope('Labeled'):
                        labeled_outputs = vocab.get_bilinear_classifier(
                            layer,
                            unlabeled_outputs,
                            token_weights=token_weights,
                            reuse=reuse)
                else:
                    labeled_outputs = vocab.get_unfactored_bilinear_classifier(
                        layer,
                        head_vocab.placeholder,
                        token_weights=token_weights,
                        reuse=reuse)
                outputs['deptree'] = labeled_outputs
                self._evals.add('deptree')
                if 'ufeats' in output_fields:
                    vocab = output_fields['ufeats']
                    outputs[vocab.field] = vocab.get_bilinear_classifier(
                        layer,
                        labeled_outputs,
                        token_weights=token_weights,
                        reuse=reuse)
                    self._evals.add('ufeats')
            elif 'dephead' in output_fields:
                vocab = output_fields['dephead']
                outputs[vocab.classname] = vocab.get_bilinear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('dephead')
        if debug:
            outputs['deptree']['token_weights'] = token_weights
            outputs['deptree']['token_weights3D'] = token_weights3D
            outputs['deptree']['root_weights'] = root_weights
            outputs['deptree']['token_weights4D'] = token_weights4D
            outputs['deptree']['token_weights_sib'] = token_weights_sib
            outputs['deptree']['token_weights_gp'] = token_weights_gp
            outputs['deptree']['token_weights_gp2'] = token_weights_gp2

        return outputs, tokens
Ejemplo n.º 2
0
    def get_input_tensor(self,
                         embed_keep_prob=None,
                         nonzero_init=False,
                         variable_scope=None,
                         reuse=True):
        """"""

        embed_keep_prob = embed_keep_prob or self.embed_keep_prob
        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        output_keep_prob = 1. if reuse else self.output_keep_prob

        layers = []
        with tf.variable_scope(variable_scope or self.classname) as scope:
            for i, placeholder in enumerate(
                    self._multibucket.get_placeholders()):
                if i:
                    scope.reuse_variables()
                #with tf.device('/gpu:0'):
                #with tf.device('/gpu:{}'.format(i)):
                with tf.variable_scope('Embeddings'):
                    layer = embeddings.token_embedding_lookup(
                        len(self),
                        self.embed_size,
                        placeholder,
                        nonzero_init=True,
                        reuse=reuse)

                seq_lengths = tf.count_nonzero(placeholder,
                                               axis=1,
                                               dtype=tf.int32)
                for j in six.moves.range(self.n_layers):
                    conv_width = self.first_layer_conv_width if not j else self.conv_width
                    with tf.variable_scope('RNN-{}'.format(j)):
                        layer, final_states = recurrent.directed_RNN(
                            layer,
                            self.recur_size,
                            seq_lengths,
                            bidirectional=self.bidirectional,
                            recur_cell=self.recur_cell,
                            conv_width=conv_width,
                            recur_func=self.recur_func,
                            conv_keep_prob=conv_keep_prob,
                            recur_keep_prob=recur_keep_prob,
                            cifg=self.cifg,
                            highway=self.highway,
                            highway_func=self.highway_func,
                            bilin=self.bilin)

                if not self.squeeze_type.startswith('gated'):
                    if self.squeeze_type == 'linear_attention':
                        with tf.variable_scope('Attention'):
                            _, layer = classifiers.linear_attention(
                                layer, hidden_keep_prob=output_keep_prob)
                    elif self.squeeze_type == 'final_hidden':
                        layer, _ = tf.split(final_states, 2, axis=-1)
                    elif self.squeeze_type == 'final_cell':
                        _, layer = tf.split(final_states, 2, axis=-1)
                    elif self.squeeze_type == 'final_state':
                        layer = final_states
                    elif self.squeeze_type == 'reduce_max':
                        layer = tf.reduce_max(layer, axis=-2)
                    with tf.variable_scope('Linear'):
                        layer = classifiers.hidden(
                            layer,
                            self.output_size,
                            hidden_func=self.output_func,
                            hidden_keep_prob=output_keep_prob)
                else:
                    with tf.variable_scope('Attention'):
                        attn, layer = classifiers.deep_linear_attention(
                            layer,
                            self.output_size,
                            hidden_func=nonlin.identity,
                            hidden_keep_prob=output_keep_prob)
                    if self.squeeze_type == 'gated_reduce_max':
                        layer = tf.nn.relu(tf.reduce_max(
                            layer, axis=-2)) + .1 * tf.reduce_sum(
                                layer, axis=-2) / (tf.count_nonzero(
                                    layer, axis=-2, dtype=tf.float32) + 1e-12)
                    elif self.squeeze_type == 'gated_reduce_sum':
                        layer = self.output_func(tf.reduce_sum(layer, axis=-2))
                #layer = tf.tf.Print(layer, [tf.shape(layer)])
                layers.append(layer)
            # Concatenate all the buckets' embeddings
            layer = tf.concat(layers, 0)
            # Put them in the right order, creating the embedding matrix
            layer = tf.nn.embedding_lookup(layer,
                                           self._multibucket.placeholder)
            #layer = tf.nn.embedding_lookup(layers, self._multibucket.placeholder, partition_strategy='div')
            #layer = tf.Print(layer, [tf.shape(layer)])
            # Get the embeddings from the embedding matrix
            layer = tf.nn.embedding_lookup(layer, self.placeholder)

            if embed_keep_prob < 1:
                layer = self.drop_func(layer, embed_keep_prob)
        return layer
Ejemplo n.º 3
0
    def build_graph(self, input_network_outputs={}, reuse=True):
        """"""

        with tf.variable_scope('Embeddings'):
            if self.sum_pos:  # TODO this should be done with a `POSMultivocab`
                pos_vocabs = list(
                    filter(lambda x: 'POS' in x.classname, self.input_vocabs))
                pos_tensors = [
                    input_vocab.get_input_tensor(embed_keep_prob=1,
                                                 reuse=reuse)
                    for input_vocab in pos_vocabs
                ]
                non_pos_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                    if 'POS' not in input_vocab.classname
                ]
                #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors]
                #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors]
                if pos_tensors:
                    pos_tensors = tf.add_n(pos_tensors)
                    if not reuse:
                        pos_tensors = [
                            pos_vocabs[0].drop_func(
                                pos_tensors, pos_vocabs[0].embed_keep_prob)
                        ]
                    else:
                        pos_tensors = [pos_tensors]
                input_tensors = non_pos_tensors + pos_tensors
            else:
                input_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(input_tensors, 2)

        n_nonzero = tf.to_float(
            tf.count_nonzero(layer, axis=-1, keep_dims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder, 0)
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob

        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            with tf.variable_scope('RNN-{}'.format(i)):
                layer, _ = recurrent.directed_RNN(
                    layer,
                    self.recur_size,
                    seq_lengths,
                    bidirectional=self.bidirectional,
                    recur_cell=self.recur_cell,
                    conv_width=conv_width,
                    recur_func=self.recur_func,
                    conv_keep_prob=conv_keep_prob,
                    recur_include_prob=recur_include_prob,
                    recur_keep_prob=recur_keep_prob,
                    cifg=self.cifg,
                    highway=self.highway,
                    highway_func=self.highway_func,
                    bilin=self.bilin)

        output_fields = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        with tf.variable_scope('Classifiers'):
            if 'deprel' in output_fields:
                vocab = output_fields['deprel']
                if vocab.factorized:
                    head_vocab = output_fields['dephead']
                    with tf.variable_scope('Unlabeled'):
                        unlabeled_outputs = head_vocab.get_bilinear_classifier(
                            layer, token_weights=token_weights, reuse=reuse)
                    with tf.variable_scope('Labeled'):
                        labeled_outputs = vocab.get_bilinear_classifier(
                            layer,
                            unlabeled_outputs,
                            token_weights=token_weights,
                            reuse=reuse)
                else:
                    labeled_outputs = vocab.get_unfactored_bilinear_classifier(
                        layer,
                        head_vocab.placeholder,
                        token_weights=token_weights,
                        reuse=reuse)
                outputs['deptree'] = labeled_outputs
                self._evals.add('deptree')
                if 'ufeats' in output_fields:
                    vocab = output_fields['ufeats']
                    outputs[vocab.field] = vocab.get_bilinear_classifier(
                        layer,
                        labeled_outputs,
                        token_weights=token_weights,
                        reuse=reuse)
                    self._evals.add('ufeats')
            elif 'dephead' in output_fields:
                vocab = output_fields['dephead']
                outputs[vocab.classname] = vocab.get_bilinear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('dephead')

        return outputs, tokens
    def build_graph(self,
                    input_network_outputs={},
                    reuse=True,
                    debug=False,
                    nornn=False):
        """"""
        #pdb.set_trace()
        with tf.variable_scope('Embeddings'):

            if self.sum_pos:  # TODO this should be done with a `POSMultivocab`
                pos_vocabs = list(
                    filter(lambda x: 'POS' in x.classname, self.input_vocabs))
                pos_tensors = [
                    input_vocab.get_input_tensor(embed_keep_prob=1,
                                                 reuse=reuse)
                    for input_vocab in pos_vocabs
                ]
                non_pos_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                    if 'POS' not in input_vocab.classname
                ]
                #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors]
                #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors]
                if pos_tensors:
                    pos_tensors = tf.add_n(pos_tensors)
                    if not reuse:
                        pos_tensors = [
                            pos_vocabs[0].drop_func(
                                pos_tensors, pos_vocabs[0].embed_keep_prob)
                        ]
                    else:
                        pos_tensors = [pos_tensors]
                input_tensors = non_pos_tensors + pos_tensors
            else:  #run this
                input_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(
                input_tensors,
                2)  #batch*sentence*feature? or batch* sentence^2*feature?
        #pdb.set_trace()
        n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1,
                                                 keepdims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder,
                                   0)  #find sentence length
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        seq_lengths = tokens_per_sequence + 1  #batch size list of sentence length
        if self.use_seq2seq:
            token_weights = nn.greater(self.node_id_vocab.placeholder,
                                       0)  #find sentence length
            bucket_size = tf.shape(self.node_id_vocab.placeholder)[1]
            tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
            node_lengths = tokens_per_sequence + 2  # for rnn decoder

            # here we remove the the <bos> token for simplicity
            token_weights = nn.greater(self.node_id_vocab.placeholder[:, 1:-1],
                                       0)  #find sentence length
            bucket_size = tf.shape(token_weights)[1]
            tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)

        #pdb.set_trace()
        root_weights = token_weights + (1 -
                                        nn.greater(tf.range(bucket_size), 0))
        token_weights3D = tf.expand_dims(
            token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
        token_weights2D = tf.expand_dims(
            root_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
        # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root(x)
        # in fact root should contained in second order prediction except sibling, but for simpler we set all for same
        token_weights4D = tf.cast(
            tf.expand_dims(token_weights2D, axis=-3) *
            tf.expand_dims(tf.expand_dims(root_weights, axis=-1), axis=-1),
            dtype=tf.float32)
        # abc -> ab,ac
        #token_weights_sib = tf.cast(tf.expand_dims(root_, axis=-3) * tf.expand_dims(tf.expand_dims(root_weights, axis=-1),axis=-1),dtype=tf.float32)
        #abc -> ab,cb
        #pdb.set_trace()
        token_weights_cop = tf.cast(
            tf.expand_dims(token_weights2D, axis=-2) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=-1),
            dtype=tf.float32)
        token_weights_cop_0 = token_weights_cop[:, 0] * tf.cast(
            tf.transpose(token_weights3D, [0, 2, 1]), dtype=tf.float32)
        token_weights_cop = tf.concat(
            [token_weights_cop_0[:, None, :], token_weights_cop[:, 1:]], 1)
        #data=np.stack((devprint['printdata']['layer_cop'][0][0]*devprint['token_weights3D'][0].T)[None,:],devprint['printdata']['layer_cop'][0][1:])
        #abc -> ab, bc
        token_weights_gp = tf.cast(
            tf.expand_dims(tf.transpose(token_weights3D, [0, 2, 1]), axis=-1) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=1), axis=1),
            dtype=tf.float32)
        #abc -> ca, ab
        token_weights_gp2 = tf.cast(
            tf.expand_dims(token_weights3D, axis=2) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=1),
            dtype=tf.float32)
        token_weights_sib = token_weights_gp
        #token_weights4D = tf.expand_dims(token_weights3D, axis=-3) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1),axis=-1)
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'token_weights3D': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob
        #R=BiLSTM(X)
        # pdb.set_trace()
        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            #'''
            if not nornn and not self.nornn:
                with tf.variable_scope('RNN-{}'.format(i)):
                    layer, sentence_feat = recurrent.directed_RNN(
                        layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=self.bidirectional,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_include_prob=recur_include_prob,
                        recur_keep_prob=recur_keep_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func,
                        bilin=self.bilin)
            #'''
        if self.separate_prediction:
            print('separating the whole two pipeline')
            with tf.device('/device:GPU:1'):
                for i in six.moves.range(self.n_layers):
                    conv_width = self.first_layer_conv_width if not i else self.conv_width
                    #'''
                    if not nornn and not self.nornn:
                        with tf.variable_scope('RNN2-{}'.format(i)):
                            layer_rel, sentence_feat = recurrent.directed_RNN(
                                layer,
                                self.recur_size,
                                seq_lengths,
                                bidirectional=self.bidirectional,
                                recur_cell=self.recur_cell,
                                conv_width=conv_width,
                                recur_func=self.recur_func,
                                conv_keep_prob=conv_keep_prob,
                                recur_include_prob=recur_include_prob,
                                recur_keep_prob=recur_keep_prob,
                                cifg=self.cifg,
                                highway=self.highway,
                                highway_func=self.highway_func,
                                bilin=self.bilin)
        else:
            layer_rel = layer
        #pdb.set_trace()
        output_fields = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        #parser/structs/vocabs/token_vocabs.py loss is calculated in get_...
        # pdb.set_trace()
        # for seq2seq, create new features
        if 'correspond_word' in output_fields:
            print('use seq2seq model for node prediction')
            with tf.variable_scope('Seq2SeqDecoder'):
                sequence_length = {}
                # here we remove 'root' node in the source sentence.(x)
                sequence_length['source'] = seq_lengths
                sequence_length['target'] = node_lengths
                pos_vocabs = []
                lemma_vocabs = []
                for input_vocab in self.input_vocabs:
                    if 'POS' in input_vocab.classname:
                        pos_vocabs.append(
                            input_vocab.get_input_tensor(reuse=reuse))
                    if 'Lemma' in input_vocab.classname:
                        lemma_vocabs.append(
                            input_vocab.get_input_tensor(reuse=reuse))
                reinput_tensors = pos_vocabs + lemma_vocabs
                reinput_tensors = tf.concat(reinput_tensors, 2)
                seq2seq_input_tensors = []
                if len(self.decoder_vocabs) > 0:
                    # get node label embedding
                    seq2seq_input_tensors = [
                        decoder_vocab.get_input_tensor(reuse=reuse)
                        for decoder_vocab in self.decoder_vocabs
                        if 'Copy' not in decoder_vocab.classname
                    ]
                    # pdb.set_trace()
                    pointer_generator_inputs = {
                        decoder_vocab.classname[7:-5]:
                        decoder_vocab.placeholder
                        for decoder_vocab in self.decoder_vocabs
                        if 'Copy' in decoder_vocab.classname
                    }
                    input_shape = seq2seq_input_tensors[0].shape
                target_placeholder = output_fields[
                    'correspond_word'].placeholder
                #target_tensor_shape=[input_shape[0],input_shape[1],reinput_tensors.shape[-1]]
                mapping = nn.greater(target_placeholder, 0)
                result_tensor = tf.batch_gather(
                    reinput_tensors, mapping) * tf.cast(
                        (mapping > 0), dtype=tf.float32)[:, :, None]
                input_features = seq2seq_input_tensors + [result_tensor]
                input_feature = tf.concat(input_features, 2)

                #input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs]
                #pdb.set_trace()
                # [batch, num_sequence_tokens+1, hidden], [batch, num_node_tokens+2,hidden] -> [batch, num_node_tokens+2, num_sequence_tokens+1]
                # here remove the "root" node from sentence encoder, so the output layer is one sequence smaller, and the mask "token_weights3D" should be smaller as well(x)
                node_encoding = output_fields['correspond_word'].forward(
                    layer, input_feature[:, :-1], sentence_feat,
                    token_weights3D, sequence_length)

                if 'label' in output_fields:
                    # pdb.set_trace()
                    self._evals.add('label')
                    label_vocab = output_fields['label']
                    label_vocab.predictor = PointerGenerator(
                        label_vocab.hidden_size, label_vocab.hidden_size,
                        len(label_vocab), 0, True, label_vocab.hidden_func,
                        label_vocab.hidden_keep_prob)
                    node_outputs = label_vocab.forward(
                        node_encoding['values'],
                        node_encoding['SrcWeights'],
                        node_encoding['CorefWeights'],
                        pointer_generator_inputs,
                        debug=debug)
                    outputs['label'] = node_outputs
                # pdb.set_trace()
                # remove the start and end token

                layer = node_encoding['values'][:, :-1]
                layer_rel = layer
                # pdb.set_trace()
        #layers
        with tf.variable_scope('Classifiers'):
            if 'semrel' in output_fields:
                vocab = output_fields['semrel']
                head_vocab = output_fields['semhead']
                head_vocab.token_weights_sib = token_weights_sib
                head_vocab.token_weights_cop = token_weights_cop
                head_vocab.token_weights_gp = token_weights_gp
                head_vocab.token_weights_gp2 = token_weights_gp2
                head_vocab.token_weights = token_weights
                if vocab.factorized:
                    with tf.variable_scope('Unlabeled'):
                        #pdb.set_trace()
                        if self.layer_mask(head_vocab):
                            unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                layer,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug,
                                token_weights4D=token_weights4D)
                        else:
                            unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                layer,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug)

                    if self.two_gpu:
                        with tf.device('/device:GPU:1'):
                            with tf.variable_scope('Labeled'):
                                labeled_outputs = vocab.get_bilinear_classifier(
                                    layer_rel,
                                    unlabeled_outputs,
                                    token_weights=token_weights3D,
                                    reuse=reuse,
                                    debug=debug)
                    else:
                        with tf.variable_scope('Labeled'):
                            labeled_outputs = vocab.get_bilinear_classifier(
                                layer_rel,
                                unlabeled_outputs,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug)
                else:
                    labeled_outputs = vocab.get_unfactored_bilinear_classifier(
                        layer,
                        head_vocab.placeholder,
                        token_weights=token_weights3D,
                        reuse=reuse)
                outputs['semgraph'] = labeled_outputs
                self._evals.add('semgraph')
            elif 'semhead' in output_fields:
                vocab = output_fields['semhead']
                outputs[vocab.classname] = vocab.get_bilinear_classifier(
                    layer, token_weights=token_weights3D, reuse=reuse)
                self._evals.add('semhead')
            if 'attr' in output_fields:
                print('predict attributes')
                attr_vocab = output_fields['attr']
                with tf.variable_scope('Attribute'):
                    attr_outputs = attr_vocab.get_bilinear_classifier(
                        layer_rel,
                        labeled_outputs,
                        token_weights=token_weights[:, :, None],
                        reuse=reuse,
                        debug=debug)
                self._evals.add('attribute')
                outputs['attribute'] = attr_outputs
                # if 'semgraph' in outputs:
                # 	outputs['semgraph']['loss'] = tf.zeros(outputs['attribute']['loss'].shape,dtype=tf.float32)

            # -------------------------------------------------------------------------
            if 'frame' in output_fields:
                print('predict sdp frames')
                frame_vocab = output_fields['frame']
                with tf.variable_scope('Frame'):
                    frame_outputs = frame_vocab.get_linear_classifier(
                        layer_rel, token_weights, reuse=reuse, debug=debug)
                self._evals.add('frame')
                # pdb.set_trace()
                outputs['frame'] = frame_outputs
            # ---------------------------------------------------------------------------

        if debug:
            outputs['semgraph']['token_weights'] = token_weights
            outputs['semgraph']['token_weights3D'] = token_weights3D
            outputs['semgraph']['root_weights'] = root_weights
            outputs['semgraph']['token_weights4D'] = token_weights4D
            outputs['semgraph']['token_weights_sib'] = token_weights_sib
            outputs['semgraph']['token_weights_cop'] = token_weights_cop
            outputs['semgraph']['token_weights_gp'] = token_weights_gp
            outputs['semgraph']['token_weights_gp2'] = token_weights_gp2
            outputs['semgraph']['printdata'][
                'word_postag'] = self.input_vocabs[-1].placeholder
            if 'correspond_word' in output_fields:
                outputs['semgraph']['input_feature'] = input_feature
                if debug:
                    outputs['semgraph']['decoder'] = node_encoding
                    outputs['semgraph']['nodes'] = node_outputs
            if 'ufeats' in output_fields:
                outputs['semgraph']['frame'] = outputs['frame']
        return outputs, tokens
    def build_graph(self,
                    input_network_outputs={},
                    reuse=True,
                    debug=False,
                    nornn=False):
        """"""
        #pdb.set_trace()
        with tf.variable_scope('Embeddings'):

            if self.sum_pos:  # TODO this should be done with a `POSMultivocab`
                pos_vocabs = list(
                    filter(lambda x: 'POS' in x.classname, self.input_vocabs))
                pos_tensors = [
                    input_vocab.get_input_tensor(embed_keep_prob=1,
                                                 reuse=reuse)
                    for input_vocab in pos_vocabs
                ]
                non_pos_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                    if 'POS' not in input_vocab.classname
                ]
                #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors]
                #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors]
                if pos_tensors:
                    pos_tensors = tf.add_n(pos_tensors)
                    if not reuse:
                        pos_tensors = [
                            pos_vocabs[0].drop_func(
                                pos_tensors, pos_vocabs[0].embed_keep_prob)
                        ]
                    else:
                        pos_tensors = [pos_tensors]
                input_tensors = non_pos_tensors + pos_tensors
            else:  #run this
                input_tensors = [
                    input_vocab.get_input_tensor(reuse=reuse)
                    for input_vocab in self.input_vocabs
                ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(
                input_tensors,
                2)  #batch*sentence*feature? or batch* sentence^2*feature?

        n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1,
                                                 keepdims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder,
                                   0)  #find sentence length
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1  #batch size list of sentence length

        root_weights = token_weights + (1 -
                                        nn.greater(tf.range(bucket_size), 0))
        token_weights3D = tf.expand_dims(
            token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
        token_weights2D = tf.expand_dims(
            token_weights, axis=-1) * tf.expand_dims(token_weights, axis=-2)
        # as our three dimension a b c, is a->b to deciding, so all binary potential should not contain root
        token_weights4D = tf.cast(
            tf.expand_dims(token_weights2D, axis=-3) *
            tf.expand_dims(tf.expand_dims(token_weights, axis=-1), axis=-1),
            dtype=tf.float32)
        #token_weights4D = tf.expand_dims(token_weights3D, axis=-3) * tf.expand_dims(tf.expand_dims(token_weights, axis=-1),axis=-1)
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'token_weights3D': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob
        #R=BiLSTM(X)
        #pdb.set_trace()
        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            #'''
            if not nornn:
                with tf.variable_scope('RNN-{}'.format(i)):
                    layer, _ = recurrent.directed_RNN(
                        layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=self.bidirectional,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_include_prob=recur_include_prob,
                        recur_keep_prob=recur_keep_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func,
                        bilin=self.bilin)
            #'''
        #pdb.set_trace()
        output_fields = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        #parser/structs/vocabs/token_vocabs.py loss is calculated in get_...
        with tf.variable_scope('Classifiers'):
            if 'semrel' in output_fields:
                vocab = output_fields['semrel']
                head_vocab = output_fields['semhead']
                if vocab.factorized:
                    if self.label_end2end:
                        with tf.variable_scope('Labeled'):
                            labeled_outputs = vocab.get_bilinear_classifier(
                                layer,
                                head_vocab.placeholder,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug)
                        with tf.variable_scope('Unlabeled'):
                            if self.layer_mask(head_vocab):
                                unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                    layer,
                                    token_weights=token_weights3D,
                                    reuse=reuse,
                                    debug=debug,
                                    token_weights4D=token_weights4D,
                                    prev_output=labeled_outputs)
                            else:
                                unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                    layer,
                                    token_weights=token_weights3D,
                                    reuse=reuse,
                                    debug=debug,
                                    prev_output=labeled_outputs)
                            labeled_outputs = unlabeled_outputs
                    else:
                        with tf.variable_scope('Unlabeled'):
                            #pdb.set_trace()
                            if self.layer_mask(head_vocab):
                                unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                    layer,
                                    token_weights=token_weights3D,
                                    reuse=reuse,
                                    debug=debug,
                                    token_weights4D=token_weights4D)
                            else:
                                unlabeled_outputs = head_vocab.get_bilinear_discriminator(
                                    layer,
                                    token_weights=token_weights3D,
                                    reuse=reuse,
                                    debug=debug)
                        with tf.variable_scope('Labeled'):
                            labeled_outputs = vocab.get_bilinear_classifier(
                                layer,
                                unlabeled_outputs,
                                token_weights=token_weights3D,
                                reuse=reuse,
                                debug=debug)
                else:
                    labeled_outputs = vocab.get_unfactored_bilinear_classifier(
                        layer,
                        head_vocab.placeholder,
                        token_weights=token_weights3D,
                        reuse=reuse)
                outputs['semgraph'] = labeled_outputs
                self._evals.add('semgraph')
            elif 'semhead' in output_fields:
                vocab = output_fields['semhead']
                outputs[vocab.classname] = vocab.get_bilinear_classifier(
                    layer, token_weights=token_weights3D, reuse=reuse)
                self._evals.add('semhead')
        if debug:
            outputs['semgraph']['token_weights'] = token_weights
            outputs['semgraph']['token_weights3D'] = token_weights3D
            outputs['semgraph']['root_weights'] = root_weights
            outputs['semgraph']['token_weights4D'] = token_weights4D

        return outputs, tokens
Ejemplo n.º 6
0
 def build_graph(self, input_network_outputs={}, reuse=True):
   """"""
   
   with tf.variable_scope('Embeddings'):
     input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs]
     for input_network, output in input_network_outputs:
       with tf.variable_scope(input_network.classname):
         input_tensors.append(input_network.get_input_tensor(output, reuse=reuse))
     layer = tf.concat(input_tensors, 2)
   batch_size, bucket_size, input_size = nn.get_sizes(layer)
   n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keep_dims=True))
   layer *= input_size / (n_nonzero + tf.constant(1e-12))
   
   token_weights = nn.greater(self.id_vocab.placeholder, 0)
   tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
   n_tokens = tf.reduce_sum(tokens_per_sequence)
   n_sequences = tf.count_nonzero(tokens_per_sequence)
   seq_lengths = tokens_per_sequence + 1
   tokens = {'n_tokens': n_tokens,
             'tokens_per_sequence': tokens_per_sequence,
             'token_weights': token_weights,
             'n_sequences': n_sequences}
   
   conv_keep_prob = 1. if reuse else self.conv_keep_prob
   recur_keep_prob = 1. if reuse else self.recur_keep_prob
   recur_include_prob = 1. if reuse else self.recur_include_prob
   
   for i in six.moves.range(self.n_layers):
     conv_width = self.first_layer_conv_width if not i else self.conv_width
     with tf.variable_scope('RNN-{}'.format(i)):
       layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths,
                                         bidirectional=self.bidirectional,
                                         recur_cell=self.recur_cell,
                                         conv_width=conv_width,
                                         recur_func=self.recur_func,
                                         conv_keep_prob=conv_keep_prob,
                                         recur_keep_prob=recur_keep_prob,
                                         recur_include_prob=recur_include_prob,
                                         cifg=self.cifg,
                                         highway=self.highway,
                                         highway_func=self.highway_func,
                                         bilin=self.bilin)
   
   output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs}
   outputs = {}
   with tf.variable_scope('Classifiers'):
     last_output = None
     if 'lemma' in output_vocabs:
       vocab = output_vocabs['lemma']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None,
         reuse=reuse)
       self._evals.add('lemma')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'upos' in output_vocabs:
       vocab = output_vocabs['upos']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('upos')
       if last_output is None:
         last_output = outputs[vocab.field]
       if reuse:
         upos_idxs = outputs[vocab.field]['predictions'] 
       else:
         upos_idxs = outputs[vocab.field]['targets']
       upos_embed = vocab.get_input_tensor(inputs=upos_idxs, embed_keep_prob=1, reuse=reuse)
       if 'xpos' in output_vocabs and not self.share_layer:
         vocab = output_vocabs['xpos']
         outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
           layer, upos_embed, token_weights,
           reuse=reuse)
         self._evals.add('xpos')
       if 'ufeats' in output_vocabs and not self.share_layer:
         vocab = output_vocabs['ufeats']
         outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
           layer, upos_embed, token_weights,
           reuse=reuse)
         self._evals.add('ufeats')
       #if 'ufeats' in output_vocabs and not self.share_layer:
       #  vocab = output_vocabs['ufeats']
       #  outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
       #    layer, upos_embed, token_weights,
       #    reuse=reuse)
       #  self._evals.add('ufeats')
     if 'xpos' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
       vocab = output_vocabs['xpos']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('xpos')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
     #if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
       vocab = output_vocabs['ufeats']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('ufeats')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'deprel' in output_vocabs:
       vocab = output_vocabs['deprel']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('deprel')
       if last_output is None:
         last_output = outputs[vocab.field]
   return outputs, tokens
Ejemplo n.º 7
0
    def build_graph(self, input_network_outputs={}, reuse=True):
        """"""

        outputs = {}
        with tf.variable_scope('Embeddings'):
            input_tensors = [
                input_vocab.get_input_tensor(reuse=reuse)
                for input_vocab in self.input_vocabs
            ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(input_tensors, 2)
        n_nonzero = tf.to_float(
            tf.count_nonzero(layer, axis=-1, keep_dims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder,
                                   0,
                                   dtype=tf.int32)
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob

        rev_layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            with tf.variable_scope('RNN_FW-{}'.format(i)):
                layer, _ = recurrent.directed_RNN(
                    layer,
                    self.recur_size,
                    seq_lengths,
                    bidirectional=False,
                    recur_cell=self.recur_cell,
                    conv_width=conv_width,
                    recur_func=self.recur_func,
                    conv_keep_prob=conv_keep_prob,
                    recur_include_prob=recur_include_prob,
                    recur_keep_prob=recur_keep_prob,
                    cifg=self.cifg,
                    highway=self.highway,
                    highway_func=self.highway_func)
            if self.bidirectional:
                with tf.variable_scope('RNN_BW-{}'.format(i)):
                    rev_layer, _ = recurrent.directed_RNN(
                        rev_layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=False,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_keep_prob=recur_keep_prob,
                        recur_include_prob=recur_include_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func)
        ones = tf.ones([batch_size, 1, 1])
        with tf.variable_scope('RNN_FW-{}/RNN/Loop'.format(i), reuse=True):
            fw_initial_state = tf.get_variable('Initial_state')
            n_splits = fw_initial_state.get_shape().as_list(
            )[-1] / self.recur_size
            fw_initial_state = tf.split(fw_initial_state, int(n_splits), -1)[0]
            start_token = ones * fw_initial_state
            layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
            layer = layer[:, 1:]
            layer = tf.reverse_sequence(layer, seq_lengths - 1, seq_axis=2)
            layer = tf.concat([start_token, layer], axis=1)
        if self.bidirectional:
            with tf.variable_scope('RNN_BW-{}/RNN/Loop'.format(i), reuse=True):
                bw_initial_state = tf.get_variable('Initial_state')
                n_splits = bw_initial_state.get_shape().as_list(
                )[-1] / self.recur_size
                bw_initial_state = tf.split(bw_initial_state, int(n_splits),
                                            -1)[0]
                stop_token = ones * bw_initial_state
                rev_layer = tf.concat([stop_token, layer], axis=1)
                rev_layer = tf.reverse_sequence(rev_layer,
                                                seq_lengths + 1,
                                                seq_axis=2)[:, 1:]
            if self.bilin:
                layer = tf.concat([layer * rev_layer, layer, rev_layer],
                                  axis=2)
            else:
                layer = tf.concat([layer, rev_layer], axis=2)

        output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        with tf.variable_scope('Classifiers'):
            if 'form' in output_vocabs:
                vocab = output_vocabs['form']
                outputs[vocab.field] = vocab.get_sampled_linear_classifier(
                    layer,
                    self.n_samples,
                    token_weights=token_weights,
                    reuse=reuse)
                self._evals.add('form')
            if 'upos' in output_vocabs:
                vocab = output_vocabs['upos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('upos')
            if 'xpos' in output_vocabs:
                vocab = output_vocabs['xpos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('xpos')
        return outputs, tokens