Beispiel #1
0
    def build_computation_graph(self):
        """Builds the computation graph."""
        # initialize the word embeddings using the pre-trained embedding file

        embeddings, emb_dim = load_embeddings_file(self.embeds,
                                                   self.languages,
                                                   lower=self.lower)
        self.w_in_dim = emb_dim

        num_words = len(set(embeddings.keys()).union(set(self.word2id.keys())))

        self.wembeds = self.model.add_lookup_parameters((num_words, emb_dim))

        self.oov_id = set(range(num_words))

        #Find words which do not appear in the pre-trained embeddings
        #by removing words which have appeared
        for i, word in enumerate(embeddings.keys()):
            if word not in self.word2id:
                self.word2id[word] = len(self.word2id.keys())
            self.wembeds.init_row(self.word2id[word], embeddings[word])

            self.oov_id.remove(self.word2id[word])

        layers = []  # inner layers

        output_layers_dict = {}  # from task_name to actual predictor

        # we have a separate layer for each task for cross-stitching;
        # otherwise just 1 layer for all tasks with hard parameter sharing
        num_task_layers = len(self.task_names) if self.cross_stitch else 1
        cross_stitch_layers = []

        for layer_num in range(self.h_layers):
            print(">>> %d layer_num" % layer_num, flush=True)
            input_dim = self.w_in_dim if layer_num == 0 \
                else self.h_dim

            task_layers = []
            # get one layer per task for cross-stitching or just one layer
            for task_id in range(num_task_layers):
                builder = dynet.LSTMBuilder(1, input_dim, self.h_dim,
                                            self.model)
                task_layers.append(BiRNNSequencePredictor(builder))
            layers.append(task_layers)
            if self.cross_stitch:
                print('Using cross-stitch units after layer %d...' % layer_num,
                      flush=True)
                cross_stitch_layers.append(
                    CrossStitchLayer(self.model, len(self.task_names),
                                     self.h_dim, self.num_subspaces,
                                     self.cross_stitch_init_scheme))

        layer_stitch_layers = []

        # store at which layer to predict task
        for task_name in self.task_names:
            task_num_labels = len(self.task2label2id[task_name])

            print('Using an MLP for task losses.', flush=True)

            input_dim = self.h_dim * 2
            activation = dynet.softmax

            layer_output = None
            if ('sentiment' in task_name):  #Multi-label classification
                #use one binary classification layer for each label
                layer_output = []
                for _ in range(task_num_labels):
                    layer_output.append(
                        Layer(self.model, input_dim, 2, activation, mlp=True))

            else:
                layer_output = Layer(self.model,
                                     input_dim,
                                     task_num_labels,
                                     activation,
                                     mlp=True)

            output_layers_dict[task_name] = layer_output  #sequence_predictor

            if (self.h_layers > 1):
                # w/o cross-stitching, we only use one LayerStitchLayer
                layer_stitch_layers.append(
                    LayerStitchLayer(self.model, self.h_layers, self.h_dim,
                                     self.layer_stitch_init_scheme))

        print('#\nOutput layers: %d\n' % len(output_layers_dict), flush=True)

        predictors = dict()
        predictors["inner"] = layers
        predictors['cross_stitch'] = cross_stitch_layers
        predictors['layer_stitch'] = layer_stitch_layers
        predictors["output_layers_dict"] = output_layers_dict
        return predictors
Beispiel #2
0
    def __init__(self, vocab, pos, rels, w2i, c2i, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #if options.learning_rate is not None:
        #    self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
        #    print("Adam initial learning rate:", options.learning_rate)
        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels
        self.pdims = options.pembedding_dims

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims))

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=True)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                _word = unicode(word, "utf-8")
                if _word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word], ext_embeddings[_word])
            print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count))

        self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)]
        self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]

        if self.bibiFlag:
            self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)]
            self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)]

        self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax))

        self.hidden_units = options.hidden_units
        
        self.hidBias = self.model.add_parameters((self.ldims * 8))
        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
        self.hid2Bias = self.model.add_parameters((self.hidden_units))

        self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8))

        if self.labelsFlag:
            self.rhidBias = self.model.add_parameters((self.ldims * 8))
            self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
            self.rhid2Bias = self.model.add_parameters((self.hidden_units))
            self.routLayer = self.model.add_parameters(
                (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8))
            self.routBias = self.model.add_parameters((len(self.irels)))
            self.ffRelPredictor = FFSequencePredictor(
                Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels),
                      softmax))

        self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))
Beispiel #3
0
    def __init__(self, vocab, pos, rels, w2i, c2i, m2i, t2i, morph_dict, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        #if options.learning_rate is not None:
        #    self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
        #    print("Adam initial learning rate:", options.learning_rate)
        self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify,
                            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag
        self.morphFlag = options.morphFlag
        self.goldMorphFlag = options.goldMorphFlag
        self.morphTagFlag = options.morphTagFlag
        self.goldMorphTagFlag = options.goldMorphTagFlag
        self.lowerCase = options.lowerCase
        self.mtag_encoding_composition_type = options.mtag_encoding_composition_type
        self.mtag_encoding_composition_alpha = options.mtag_encoding_composition_alpha

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.mdims = options.membedding_dims
        self.tdims = options.tembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in iter(w2i.items())}
        self.pos = {word: ind for ind, word in enumerate(pos)}
        self.id2pos = {ind: word for ind, word in enumerate(pos)}
        self.c2i = c2i
        self.m2i = m2i
        self.t2i = t2i
        self.i2t = {t2i[i]:i for i in self.t2i}
        self.morph_dict = morph_dict
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels
        self.pdims = options.pembedding_dims
        self.tagging_attention_size = options.tagging_att_size

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims))
        self.ext_embeddings = None

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=self.lowerCase, type=options.external_embedding_type)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                if word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word], ext_embeddings[word])
            self.ext_embeddings = ext_embeddings
            print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count))

        self.morph_dims = 2*2*self.mdims if self.morphFlag else 0
        self.mtag_dims = 2*self.tdims if self.morphTagFlag else 0
        self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)]
        self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]

        if self.bibiFlag:
            self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)]
            self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                              VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)]
        elif self.layers > 0:
            self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model),
                             VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)]
        else:
            self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model),
                             SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)]

        self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax))

        self.hidden_units = options.hidden_units

        self.hidBias = self.model.add_parameters((self.ldims * 8))
        self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
        self.hid2Bias = self.model.add_parameters((self.hidden_units))

        self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8))

        if self.labelsFlag:
            self.rhidBias = self.model.add_parameters((self.ldims * 8))
            self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8))
            self.rhid2Bias = self.model.add_parameters((self.hidden_units))
            self.routLayer = self.model.add_parameters(
                (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8))
            self.routBias = self.model.add_parameters((len(self.irels)))
            self.ffRelPredictor = FFSequencePredictor(
                Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels),
                      softmax))

        self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))

        if self.morphFlag:
            self.seg_lstm = [VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model),
                                    VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)]
            self.seg_hidLayer = self.model.add_parameters((1, self.cdims*2))
            self.slookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims))

            self.char_lstm = [VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model),
                                    VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model)]
            self.char_hidLayer = self.model.add_parameters((self.mdims, self.mdims*2))
            self.mclookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims))

            self.morph_lstm = [VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model),
                                VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model)]
            self.morph_hidLayer = self.model.add_parameters((self.wdims, self.wdims*2))
            self.mlookup = self.model.add_lookup_parameters((len(m2i), self.mdims))

            self.morph_rnn = RNNSequencePredictor(LSTMBuilder(1, self.mdims*2, self.mdims*2, self.model))

        if self.morphTagFlag:
            # All weights for morpheme taging will be here. (CURSOR)

            # Decoder
            self.dec_lstm = VanillaLSTMBuilder(1, 2 * self.cdims + self.tdims + self.cdims * 2, self.cdims, self.model)

            # Attention
            self.attention_w1 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_w2 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_v = self.model.add_parameters((1, self.tagging_attention_size))

            # Attention Context
            self.attention_w1_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_w2_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2))
            self.attention_v_context = self.model.add_parameters((1, self.tagging_attention_size))

            # MLP - Softmax
            self.decoder_w = self.model.add_parameters((len(t2i), self.cdims))
            self.decoder_b = self.model.add_parameters((len(t2i)))

            self.mtag_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.tdims, self.tdims, self.model))
            self.tlookup = self.model.add_lookup_parameters((len(t2i), self.tdims))
            if self.mtag_encoding_composition_type != "None":
                self.mtag_encoding_f_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims))
                self.mtag_encoding_f_b = self.model.add_parameters((2 * self.tdims))
                self.mtag_encoding_b_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims))
                self.mtag_encoding_b_b = self.model.add_parameters((2 * self.tdims))
    def build_computation_graph(self, num_words, num_chars):
        """Builds the computation graph."""
        # initialize the word embeddings
        if self.embeds_file:
            print('Loading embeddings', flush=True)
            embeddings, emb_dim = load_embeddings_file(self.embeds_file,
                                                       lower=self.lower)
            assert (emb_dim == self.in_dim)
            # initialize all words with embeddings; for very large vocabularies,
            # we don't want to do this
            num_words = len(
                set(embeddings.keys()).union(set(self.word2id.keys())))
            # init model parameters and initialize them
            wembeds = self.model.add_lookup_parameters(
                (num_words, self.in_dim))
            cembeds = self.model.add_lookup_parameters(
                (num_chars, self.c_in_dim))

            for i, word in enumerate(embeddings.keys()):
                if word not in self.word2id:
                    self.word2id[word] = len(self.word2id.keys())
                wembeds.init_row(self.word2id[word], embeddings[word])
            print('Initialized %d word embeddings...' % i, flush=True)
        else:
            wembeds = self.model.add_lookup_parameters(
                (num_words, self.in_dim))
            cembeds = self.model.add_lookup_parameters(
                (num_chars, self.c_in_dim))

        layers = []  # inner layers
        output_layers_dict = {}  # from task_name to actual softmax predictor
        task_expected_at = {}  # maps task_name => output_layer id

        # connect output layers to tasks
        for output_layer_id, task_name in zip(self.pred_layer,
                                              self.task_names):
            assert output_layer_id <= self.h_layers,\
                ('Error: Task cannot be predicted at layer beyond model. '
                 'Increase h_layers.')
            task_expected_at[task_name] = output_layer_id

        print('Task expected at', task_expected_at, flush=True)
        print('h_layers:', self.h_layers, flush=True)

        # we have a separate layer for each task for cross-stitching;
        # otherwise just 1 layer for all tasks with hard parameter sharing
        num_task_layers = len(self.task_names) if self.cross_stitch else 1
        cross_stitch_layers = []
        for layer_num in range(self.h_layers):
            print(">>> %d layer_num" % layer_num, flush=True)
            input_dim = self.in_dim + self.c_in_dim * 2 if layer_num == 0 \
                else self.h_dim
            task_layers = []
            # get one layer per task for cross-stitching or just one layer
            for task_id in range(num_task_layers):
                builder = dynet.LSTMBuilder(1, input_dim, self.h_dim,
                                            self.model)
                task_layers.append(BiRNNSequencePredictor(builder))
            layers.append(task_layers)
            if self.cross_stitch:
                print('Using cross-stitch units after layer %d...' % layer_num,
                      flush=True)
                cross_stitch_layers.append(
                    CrossStitchLayer(self.model, len(self.task_names),
                                     self.h_dim, self.num_subspaces,
                                     self.cross_stitch_init_scheme))

        layer_stitch_layers = []

        # store at which layer to predict task
        for task_name in self.task_names:
            task_num_labels = len(self.task2tag2idx[task_name])

            # use a small MLP both for the task losses
            print('Using an MLP for task losses.', flush=True)
            # if we concatenate, the FC layer has to have a larger input_dim
            input_dim = self.h_dim * 2 * self.h_layers\
                if self.layer_connect == CONCAT else self.h_dim * 2
            layer_output = Layer(self.model,
                                 input_dim,
                                 task_num_labels,
                                 dynet.softmax,
                                 mlp=True)
            sequence_predictor = SequencePredictor(layer_output)
            output_layers_dict[task_name] = sequence_predictor

            if self.layer_connect == STITCH:
                print('Using layer-stitch units for task %s...' % task_name,
                      flush=True)
                # w/o cross-stitching, we only use one LayerStitchLayer
                layer_stitch_layers.append(
                    LayerStitchLayer(self.model, self.h_layers, self.h_dim,
                                     self.layer_stitch_init_scheme))

        print('#\nOutput layers: %d\n' % len(output_layers_dict), flush=True)

        # initialize the char RNN
        char_rnn = RNNSequencePredictor(
            dynet.LSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model))

        predictors = dict()
        predictors["inner"] = layers
        predictors['cross_stitch'] = cross_stitch_layers
        predictors['layer_stitch'] = layer_stitch_layers
        predictors["output_layers_dict"] = output_layers_dict
        predictors["task_expected_at"] = task_expected_at
        return predictors, char_rnn, wembeds, cembeds
Beispiel #5
0
    def __init__(self, vocab, ner, rels, w2i, c2i, options):
        self.model = ParameterCollection()
        random.seed(1)
        self.trainer = AdamTrainer(self.model)
        if options.learning_rate is not None:
            self.trainer = AdamTrainer(self.model, alpha=options.learning_rate)
            print("Adam initial learning rate:", options.learning_rate)
        self.activations = {
            'tanh': tanh,
            'sigmoid': logistic,
            'relu': rectify,
            'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.blstmFlag = options.blstmFlag
        self.labelsFlag = options.labelsFlag
        self.costaugFlag = options.costaugFlag
        self.bibiFlag = options.bibiFlag

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.cdims = options.cembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.ner = {word: ind for ind, word in enumerate(ner)}
        self.id2ner = {ind: word for ind, word in enumerate(ner)}
        self.c2i = c2i
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.id2rels = rels
        # print self.rels
        # print self.id2rels
        self.nerdims = options.nembedding_dims
        self.mixture_weight = options.mixture_weight
        #self.posCount = postagCount

        #self.pos2id = {word: ind + 1 for ind, word in enumerate(postagCount.keys())}
        #self.pdims = options.pembedding_dims

        self.vocab['*PAD*'] = 1
        self.vocab['*INITIAL*'] = 2
        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims))
        self.nerlookup = self.model.add_lookup_parameters(
            (len(ner), self.nerdims))
        #self.plookup = self.model.add_lookup_parameters((len(postagCount.keys()) + 1, self.pdims))

        if options.external_embedding is not None:
            ext_embeddings, ext_emb_dim = load_embeddings_file(
                options.external_embedding, lower=True)
            assert (ext_emb_dim == self.wdims)
            print("Initializing word embeddings by pre-trained vectors")
            count = 0
            for word in self.vocab:
                _word = unicode(word, "utf-8")
                if _word in ext_embeddings:
                    count += 1
                    self.wlookup.init_row(self.vocab[word],
                                          ext_embeddings[_word])
            print("Vocab size: %d; #words having pretrained vectors: %d" %
                  (len(self.vocab), count))

        self.ner_builders = [
            VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims,
                               self.model),
            VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims,
                               self.model)
        ]
        self.ner_bbuilders = [
            VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
            VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
        ]

        if self.bibiFlag:
            self.builders = [
                VanillaLSTMBuilder(1,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(1,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model)
            ]
            self.bbuilders = [
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model),
                VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)
            ]
        elif self.layers > 0:
            self.builders = [
                VanillaLSTMBuilder(self.layers,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model),
                VanillaLSTMBuilder(self.layers,
                                   self.wdims + self.cdims * 2 + self.nerdims,
                                   self.ldims, self.model)
            ]
        else:
            self.builders = [
                SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims,
                                 self.ldims, self.model),
                SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims,
                                 self.ldims, self.model)
            ]

        # self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.ner), softmax))

        self.hidden_units = options.hidden_units

        self.char_rnn = RNNSequencePredictor(
            LSTMBuilder(1, self.cdims, self.cdims, self.model))

        self.crf_module = CRF(self.model, self.id2ner)

        self.tanh_layer_W = self.model.add_parameters(
            (self.hidden_units, 2 * self.ldims))
        self.tanh_layer_b = self.model.add_parameters((self.hidden_units))

        self.last_layer_W = self.model.add_parameters(
            (len(self.ner), self.hidden_units))
        self.last_layer_b = self.model.add_parameters((len(self.ner)))

        W = orthonormal_initializer(self.hidden_units, 2 * self.ldims)

        self.head_layer_W = self.model.parameters_from_numpy(W)
        self.head_layer_b = self.model.add_parameters(
            (self.hidden_units, ), init=dynet.ConstInitializer(0.))

        self.dep_layer_W = self.model.parameters_from_numpy(W)
        self.dep_layer_b = self.model.add_parameters(
            (self.hidden_units, ), init=dynet.ConstInitializer(0.))

        self.rel_U = self.model.add_parameters(
            (len(self.rels) * self.hidden_units, self.hidden_units),
            init=dynet.ConstInitializer(0.))

        self.rel_W = self.model.parameters_from_numpy(
            orthonormal_initializer(len(self.rels), 2 * self.hidden_units))
        #self.rel_W = self.model.add_parameters((len(self.rels), self.hidden_units * 2))
        self.rel_b = self.model.add_parameters((len(self.rels), ),
                                               init=dynet.ConstInitializer(0.))