Beispiel #1
0
    def __init__(self,
                 character_embeddings_size,
                 encodings,
                 rnn_size=100,
                 rnn_layers=1,
                 embeddings_size=100,
                 model=None,
                 runtime=False):
        if model is None:
            self.model = dy.Model()
        else:
            self.model = model

        self.encodings = encodings

        self.character_embeddings_size = character_embeddings_size
        self.embeddings_size = embeddings_size
        self.num_characters = len(encodings.char2int)
        self.character_lookup = self.model.add_lookup_parameters(
            (self.num_characters, character_embeddings_size))

        self.rnn_fw = []
        self.rnn_bw = []
        self.rnn_layers = rnn_layers
        self.rnn_size = rnn_size
        input_size = character_embeddings_size + 3
        for _ in xrange(rnn_layers):
            if runtime:
                self.rnn_fw.append(
                    dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model))
                self.rnn_bw.append(
                    dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model))
            else:
                from utils import orthonormal_VanillaLSTMBuilder
                self.rnn_fw.append(
                    orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size,
                                                   self.model))
                self.rnn_bw.append(
                    orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size,
                                                   self.model))

            input_size = rnn_size * 2
        self.linearW = self.model.add_parameters(
            (embeddings_size,
             rnn_size * 4))  # last state and attention over the other states
        self.linearB = self.model.add_parameters((embeddings_size))

        self.att_w1 = self.model.add_parameters((rnn_size, rnn_size * 2))
        self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2))
        self.att_v = self.model.add_parameters((1, rnn_size))
Beispiel #2
0
    def __init__(self, config, encodings, embeddings, runtime=False):
        self.config = config
        self.encodings = encodings
        # Bug in encodings - will be removed after UD
        self.has_bug=False
        if self.encodings.char2int[' ']!=1:
            self.has_bug=True
            import sys
            sys.stdout.write("Detected encodings BUG!")
        self.embeddings = embeddings
        self.losses = []
        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9)
        self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings,
                                                  rnn_size=self.config.char_rnn_size,
                                                  rnn_layers=self.config.char_rnn_layers,
                                                  embeddings_size=self.config.char_embeddings,
                                                  model=self.model, runtime=runtime)
        self.word2lemma={}

        self.upos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.upos2int), self.config.tag_embeddings_size))
        self.xpos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.xpos2int), self.config.tag_embeddings_size))
        self.attrs_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.attrs2int), self.config.tag_embeddings_size))
        self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings))
        if runtime:
            self.rnn = dy.LSTMBuilder(self.config.rnn_layers,
                                      self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size,
                                      self.config.rnn_size,
                                      self.model)
        else:
            from utils import orthonormal_VanillaLSTMBuilder
            self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers,
                                                      self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size,
                                                      self.config.rnn_size,
                                                      self.model)

        # self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2))
        # self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size))
        # self.att_v = self.model.add_parameters((1, 200))

        self.start_lookup = self.model.add_lookup_parameters(
            (1, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size))

        self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 3, self.config.rnn_size))
        self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 3))

        ofs = len(self.encodings.char2int)
        self.label2int = {}
        self.label2int['<EOS>'] = ofs
        self.label2int['<COPY>'] = ofs + 1
        self.label2int['<INC>'] = ofs + 2
Beispiel #3
0
    def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False):
        self.config = lemmatizer_config
        self.encodings = encodings
        # Bug in encodings - this will be removed after UD Shared Task
        self.has_bug = False
        if self.encodings.char2int[' '] != 1:
            self.has_bug = True
        self.embeddings = embeddings
        self.losses = []

        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9)

        self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings,
                                                  rnn_size=self.config.char_rnn_size,
                                                  rnn_layers=self.config.char_rnn_layers,
                                                  embeddings_size=self.config.char_embeddings,
                                                  model=self.model, runtime=runtime)

        self.upos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.upos2int), self.config.tag_embeddings_size))
        self.xpos_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.xpos2int), self.config.tag_embeddings_size))
        self.attrs_lookup = self.model.add_lookup_parameters(
            (len(self.encodings.attrs2int), self.config.tag_embeddings_size))
        self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings))

        if runtime:
            self.rnn = dy.LSTMBuilder(self.config.rnn_layers,
                                      self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size,
                                      self.model)
        else:
            from utils import orthonormal_VanillaLSTMBuilder
            self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers,
                                                      self.config.char_rnn_size * 2 + self.config.char_embeddings,
                                                      self.config.rnn_size,
                                                      self.model)

        self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2))
        self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size))
        self.att_v = self.model.add_parameters((1, 200))

        self.start_lookup = self.model.add_lookup_parameters(
            (1, self.config.char_rnn_size * 2 + self.config.char_embeddings))

        self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 1, self.config.rnn_size))
        self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 1))
        self.softmax_casing_w = self.model.add_parameters((2, self.config.rnn_size))
        self.softmax_casing_b = self.model.add_parameters((2))
Beispiel #4
0
    def __init__(self, src_we, dst_we, input_encodings, output_encodings,
                 config):
        self.config = config
        self.losses = []
        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model)
        self.src_we = src_we
        self.dst_we = dst_we
        self.input_encodings = input_encodings
        self.output_encodings = output_encodings
        # encoder
        self.encoder_fw = []
        self.encoder_bw = []
        input_size = config.input_size
        for layer_size in self.config.encoder_layers:
            self.encoder_fw.append(
                orthonormal_VanillaLSTMBuilder(1, input_size, layer_size,
                                               self.model))
            self.encoder_bw.append(
                orthonormal_VanillaLSTMBuilder(1, input_size, layer_size,
                                               self.model))
            input_size = layer_size * 2

        # decoder
        #self.decoder = []
        #for layer_size in self.config.decoder_layers:
        self.decoder = orthonormal_VanillaLSTMBuilder(
            config.decoder_layers, input_size + self.config.input_size,
            config.decoder_size, self.model)
        input_size = config.decoder_size

        # output softmax
        self.output_softmax_w = self.model.add_parameters(
            (len(self.output_encodings.word2int) + 1, input_size))
        self.output_softmax_b = self.model.add_parameters(
            (len(self.output_encodings.word2int) + 1))
        self.EOS = len(self.output_encodings.word2int)
        # aux WE layer
        self.aux_layer_w = self.model.add_parameters(
            (self.config.aux_we_layer_size, self.config.decoder_size))
        self.aux_layer_b = self.model.add_parameters(
            (self.config.aux_we_layer_size))
        # aux WE projection
        self.aux_layer_proj_w = self.model.add_parameters(
            (self.dst_we.word_embeddings_size, self.config.aux_we_layer_size))
        self.aux_layer_proj_b = self.model.add_parameters(
            (self.dst_we.word_embeddings_size))

        # input projection
        self.word_proj_w = self.model.add_parameters(
            (self.config.input_size, self.src_we.word_embeddings_size))
        self.word_proj_b = self.model.add_parameters((self.config.input_size))
        self.hol_we_src = self.model.add_lookup_parameters(
            (len(self.input_encodings.word2int), self.config.input_size))
        self.hol_we_dst = self.model.add_lookup_parameters(
            (len(self.output_encodings.word2int), self.config.input_size))
        self.special_we = self.model.add_lookup_parameters(
            (2, self.config.input_size))

        # attention
        self.att_w1 = self.model.add_parameters(
            (self.config.encoder_layers[-1] * 2,
             self.config.encoder_layers[-1] * 2))
        self.att_w2 = self.model.add_parameters(
            (self.config.encoder_layers[-1] * 2, self.config.decoder_size))
        self.att_v = self.model.add_parameters(
            (1, self.config.encoder_layers[-1] * 2))
Beispiel #5
0
    def __init__(self,
                 tagger_config,
                 encodings,
                 embeddings,
                 aux_softmax_weight=0.2,
                 runtime=False):
        self.config = tagger_config
        self.encodings = encodings
        self.embeddings = embeddings

        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(
            self.model, alpha=2e-3, beta_1=0.9,
            beta_2=0.9)  # dy.MomentumSGDTrainer(self.model)
        self.trainer.set_sparse_updates(False)
        self.character_network = CharacterNetwork(
            100,
            encodings,
            rnn_size=200,
            rnn_layers=1,
            embeddings_size=self.embeddings.word_embeddings_size,
            model=self.model,
            runtime=runtime)

        self.unknown_word_embedding = self.model.add_lookup_parameters(
            (1, self.embeddings.word_embeddings_size))
        self.holistic_word_embedding = self.model.add_lookup_parameters(
            (len(encodings.word2int), self.embeddings.word_embeddings_size))

        self.char_proj_w = self.model.add_parameters(
            (self.config.input_size, self.embeddings.word_embeddings_size))
        self.emb_proj_w = self.model.add_parameters(
            (self.config.input_size, self.embeddings.word_embeddings_size))
        self.hol_proj_w = self.model.add_parameters(
            (self.config.input_size, self.embeddings.word_embeddings_size))

        self.bdrnn_fw = []
        self.bdrnn_bw = []
        rnn_input_size = self.config.input_size  # self.embeddings.word_embeddings_size

        aux_softmax_input_size = 0
        index = 0
        for layer_size in self.config.layers:
            if runtime:
                self.bdrnn_fw.append(
                    dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size,
                                          self.model))
                self.bdrnn_bw.append(
                    dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size,
                                          self.model))
            else:
                self.bdrnn_fw.append(
                    orthonormal_VanillaLSTMBuilder(1, rnn_input_size,
                                                   layer_size, self.model))
                self.bdrnn_bw.append(
                    orthonormal_VanillaLSTMBuilder(1, rnn_input_size,
                                                   layer_size, self.model))
            rnn_input_size = layer_size * 2
            index += 1
            if index == self.config.aux_softmax_layer:
                aux_softmax_input_size = rnn_input_size

        self.mlps = []
        for _ in xrange(3):  # upos, xpos and attrs
            mlp_w = []
            mlp_b = []
            input_sz = self.config.layers[-1] * 2
            for l_size in self.config.presoftmax_mlp_layers:
                mlp_w.append(self.model.add_parameters((l_size, input_sz)))
                mlp_b.append(self.model.add_parameters((l_size)))
                input_sz = l_size
            self.mlps.append([mlp_w, mlp_b])

        softmax_input_size = self.config.presoftmax_mlp_layers[-1]
        self.softmax_upos_w = self.model.add_parameters(
            (len(self.encodings.upos2int), softmax_input_size))
        self.softmax_upos_b = self.model.add_parameters(
            (len(self.encodings.upos2int)))
        self.softmax_xpos_w = self.model.add_parameters(
            (len(self.encodings.xpos2int), softmax_input_size))
        self.softmax_xpos_b = self.model.add_parameters(
            (len(self.encodings.xpos2int)))
        self.softmax_attrs_w = self.model.add_parameters(
            (len(self.encodings.attrs2int), softmax_input_size))
        self.softmax_attrs_b = self.model.add_parameters(
            (len(self.encodings.attrs2int)))

        self.aux_softmax_upos_w = self.model.add_parameters(
            (len(self.encodings.upos2int), aux_softmax_input_size))
        self.aux_softmax_upos_b = self.model.add_parameters(
            (len(self.encodings.upos2int)))
        self.aux_softmax_xpos_w = self.model.add_parameters(
            (len(self.encodings.xpos2int), aux_softmax_input_size))
        self.aux_softmax_xpos_b = self.model.add_parameters(
            (len(self.encodings.xpos2int)))
        self.aux_softmax_attrs_w = self.model.add_parameters(
            (len(self.encodings.attrs2int), aux_softmax_input_size))
        self.aux_softmax_attrs_b = self.model.add_parameters(
            (len(self.encodings.attrs2int)))

        self.aux_softmax_weight = aux_softmax_weight
        self.losses = []
Beispiel #6
0
    def __init__(self, params, model=None):
        self.UPSAMPLE_PROJ = 200
        self.RNN_SIZE = 448
        self.RNN_LAYERS = 1
        self.OUTPUT_EMB_SIZE = 1
        self.params = params
        if model is None:
            self.model = dy.Model()
        else:
            self.model = model

        self.trainer = dy.AdamTrainer(self.model, alpha=1e-4)
        self.trainer.set_sparse_updates(True)
        self.trainer.set_clip_threshold(5.0)
        # self.trainer = dy.AdamTrainer(self.model)
        # MGCs are extracted at 12.5 ms

        upsample_count = int(12.5 * self.params.target_sample_rate / 1000)
        # self.upsample_w_s = []
        self.upsample_w_t = []
        # self.upsample_b_s = []
        self.upsample_b_t = []
        for _ in xrange(upsample_count):
            # self.upsample_w_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ, self.params.mgc_order)))
            self.upsample_w_t.append(
                self.model.add_parameters(
                    (self.UPSAMPLE_PROJ, self.params.mgc_order * 2)))
            # self.upsample_b_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ)))
            self.upsample_b_t.append(
                self.model.add_parameters((self.UPSAMPLE_PROJ)))

        self.output_coarse_lookup = self.model.add_lookup_parameters(
            (256, self.OUTPUT_EMB_SIZE))
        self.output_fine_lookup = self.model.add_lookup_parameters(
            (256, self.OUTPUT_EMB_SIZE))
        from utils import orthonormal_VanillaLSTMBuilder
        # self.rnn = orthonormal_VanillaLSTMBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model)
        self.rnnCoarse = orthonormal_VanillaLSTMBuilder(
            self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ,
            self.RNN_SIZE, self.model)
        self.rnnFine = orthonormal_VanillaLSTMBuilder(
            self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ,
            self.RNN_SIZE, self.model)
        # self.rnnCoarse = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ,
        #                                self.RNN_SIZE, self.model)
        # self.rnnFine = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ,
        #                              self.RNN_SIZE, self.model)

        self.mlp_coarse_w = []
        self.mlp_coarse_b = []
        self.mlp_coarse_w.append(
            self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE)))
        self.mlp_coarse_b.append(self.model.add_parameters((self.RNN_SIZE)))

        self.mlp_fine_w = []
        self.mlp_fine_b = []
        self.mlp_fine_w.append(
            self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE)))
        self.mlp_fine_b.append(self.model.add_parameters((self.RNN_SIZE)))

        self.softmax_coarse_w = self.model.add_parameters((256, self.RNN_SIZE))
        self.softmax_coarse_b = self.model.add_parameters((256))
        self.softmax_fine_w = self.model.add_parameters((256, self.RNN_SIZE))
        self.softmax_fine_b = self.model.add_parameters((256))
Beispiel #7
0
    def __init__(self, params, num_phones, phone2int, model=None):
        self.model = model
        self.params = params
        self.PHONE_EMBEDDINGS_SIZE = 100
        self.ENCODER_SIZE = 200
        self.ENCODER_LAYERS = 2
        self.DECODER_SIZE = 200
        self.DECODER_LAYERS = 2
        self.phone2int = phone2int

        if self.model is None:
            self.model = dy.Model()
            self.trainer = dy.AdamTrainer(self.model)
            self.trainer.set_sparse_updates(True)
            self.trainer.set_clip_threshold(5.0)

        self.phone_lookup = self.model.add_lookup_parameters(
            (num_phones + 2, self.PHONE_EMBEDDINGS_SIZE))
        from utils import orthonormal_VanillaLSTMBuilder
        self.encoder_fw = []
        self.encoder_bw = []
        self.encoder_fw.append(
            orthonormal_VanillaLSTMBuilder(1, self.PHONE_EMBEDDINGS_SIZE,
                                           self.ENCODER_SIZE, self.model))
        self.encoder_bw.append(
            orthonormal_VanillaLSTMBuilder(1, self.PHONE_EMBEDDINGS_SIZE,
                                           self.ENCODER_SIZE, self.model))

        for zz in xrange(1, self.ENCODER_LAYERS):
            self.encoder_fw.append(
                orthonormal_VanillaLSTMBuilder(1, self.ENCODER_SIZE * 2,
                                               self.ENCODER_SIZE, self.model))
            self.encoder_bw.append(
                orthonormal_VanillaLSTMBuilder(1, self.ENCODER_SIZE * 2,
                                               self.ENCODER_SIZE, self.model))

        self.decoder = dy.VanillaLSTMBuilder(self.DECODER_LAYERS,
                                             self.ENCODER_SIZE * 2 + 100,
                                             self.DECODER_SIZE, self.model)

        # self.aux_hid_w = self.model.add_parameters((500, self.ENCODER_SIZE * 2))
        # self.aux_hid_b = self.model.add_parameters((500))
        # self.aux_proj_w = self.model.add_parameters((params.mgc_order, 500))
        # self.aux_proj_b = self.model.add_parameters((params.mgc_order))

        self.hid_w = self.model.add_parameters((500, self.DECODER_SIZE))
        self.hid_b = self.model.add_parameters((500))

        self.proj_w_1 = self.model.add_parameters((params.mgc_order, 500))
        self.proj_b_1 = self.model.add_parameters((params.mgc_order))
        self.proj_w_2 = self.model.add_parameters((params.mgc_order, 500))
        self.proj_b_2 = self.model.add_parameters((params.mgc_order))
        self.proj_w_3 = self.model.add_parameters((params.mgc_order, 500))
        self.proj_b_3 = self.model.add_parameters((params.mgc_order))

        self.highway_w = self.model.add_parameters(
            (params.mgc_order, self.ENCODER_SIZE * 2))

        self.last_mgc_proj_w = self.model.add_parameters(
            (100, self.params.mgc_order))
        self.last_mgc_proj_b = self.model.add_parameters((100))
        # self.last_att_proj_w = self.model.add_parameters((200, self.ENCODER_SIZE * 2))
        # self.last_att_proj_b = self.model.add_parameters((200))

        self.stop_w = self.model.add_parameters((1, self.DECODER_SIZE))
        self.stop_b = self.model.add_parameters((1))

        self.att_w1 = self.model.add_parameters((100, self.ENCODER_SIZE * 2))
        self.att_w2 = self.model.add_parameters((100, self.DECODER_SIZE))
        self.att_v = self.model.add_parameters((1, 100))

        self.start_lookup = self.model.add_lookup_parameters(
            (1, params.mgc_order))
        self.decoder_start_lookup = self.model.add_lookup_parameters(
            (1, self.ENCODER_SIZE * 2 + 100))
Beispiel #8
0
    def __init__(self,
                 parser_config,
                 encodings,
                 embeddings,
                 aux_softmax_weight=0.2,
                 runtime=False):
        self.config = parser_config
        self.encodings = encodings
        self.embeddings = embeddings
        self.decoder = GreedyDecoder()

        self.model = dy.Model()

        # self.trainer = dy.SimpleSGDTrainer(self.model)
        self.trainer = dy.AdamTrainer(self.model,
                                      alpha=2e-3,
                                      beta_1=0.9,
                                      beta_2=0.9)

        self.trainer.set_sparse_updates(False)
        self.character_network = CharacterNetwork(
            100,
            encodings,
            rnn_size=200,
            rnn_layers=1,
            embeddings_size=self.config.input_embeddings_size,
            model=self.model,
            runtime=runtime)

        self.holistic_embeddings = self.model.add_lookup_parameters(
            (len(self.encodings.word2int), self.config.input_embeddings_size))

        self.input_proj_w_word = self.model.add_parameters(
            (self.config.input_embeddings_size,
             self.embeddings.word_embeddings_size))
        self.input_proj_b_word = self.model.add_parameters(
            (self.config.input_embeddings_size))

        self.unknown_word_embedding = self.model.add_lookup_parameters(
            (3, self.config.input_embeddings_size))  # for padding lexical
        self.pad_tag_embedding = self.model.add_lookup_parameters(
            (3, self.config.input_embeddings_size))  # for padding morphology

        self.bdrnn_fw = []
        self.bdrnn_bw = []

        rnn_input_size = 0
        if self.config.use_lexical:
            rnn_input_size += self.config.input_embeddings_size

        if self.config.use_morphology:
            rnn_input_size += self.config.input_embeddings_size
            self.upos_lookup = self.model.add_lookup_parameters(
                (len(self.encodings.upos2int),
                 self.config.input_embeddings_size))
            self.xpos_lookup = self.model.add_lookup_parameters(
                (len(self.encodings.xpos2int),
                 self.config.input_embeddings_size))
            self.attrs_lookup = self.model.add_lookup_parameters(
                (len(self.encodings.attrs2int),
                 self.config.input_embeddings_size))

        index = 0
        aux_proj_input_size = 0
        for layer_size in self.config.layers:
            if runtime:
                self.bdrnn_fw.append(
                    dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size,
                                          self.model))
                self.bdrnn_bw.append(
                    dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size,
                                          self.model))
            else:
                self.bdrnn_fw.append(
                    orthonormal_VanillaLSTMBuilder(1, rnn_input_size,
                                                   layer_size, self.model))
                self.bdrnn_bw.append(
                    orthonormal_VanillaLSTMBuilder(1, rnn_input_size,
                                                   layer_size, self.model))
            rnn_input_size = layer_size * 2
            index += 1
            if index == self.config.aux_softmax_layer:
                aux_proj_input_size = rnn_input_size

        proj_input_size = self.config.layers[-1] * 2

        self.proj_arc_w_head = self.model.add_parameters(
            (self.config.arc_proj_size, proj_input_size))
        self.proj_arc_b_head = self.model.add_parameters(
            (self.config.arc_proj_size))
        self.proj_arc_w_dep = self.model.add_parameters(
            (self.config.arc_proj_size, proj_input_size))
        self.proj_arc_b_dep = self.model.add_parameters(
            (self.config.arc_proj_size))
        self.proj_label_w_head = self.model.add_parameters(
            (self.config.label_proj_size, proj_input_size))
        self.proj_label_b_head = self.model.add_parameters(
            (self.config.label_proj_size))
        self.proj_label_w_dep = self.model.add_parameters(
            (self.config.label_proj_size, proj_input_size))
        self.proj_label_b_dep = self.model.add_parameters(
            (self.config.label_proj_size))
        if not self.config.predict_morphology:
            self.aux_proj_arc_w_head = self.model.add_parameters(
                (self.config.arc_proj_size, aux_proj_input_size))
            self.aux_proj_arc_b_head = self.model.add_parameters(
                (self.config.arc_proj_size))
            self.aux_proj_arc_w_dep = self.model.add_parameters(
                (self.config.arc_proj_size, aux_proj_input_size))
            self.aux_proj_arc_b_dep = self.model.add_parameters(
                (self.config.arc_proj_size))
        else:
            self.upos_proj_w = self.model.add_parameters(
                (self.config.label_proj_size, aux_proj_input_size))
            self.xpos_proj_w = self.model.add_parameters(
                (self.config.label_proj_size, aux_proj_input_size))
            self.attrs_proj_w = self.model.add_parameters(
                (self.config.label_proj_size, aux_proj_input_size))
            self.upos_proj_b = self.model.add_parameters(
                (self.config.label_proj_size))
            self.xpos_proj_b = self.model.add_parameters(
                (self.config.label_proj_size))
            self.attrs_proj_b = self.model.add_parameters(
                (self.config.label_proj_size))

        self.link_b = self.model.add_parameters((1, self.config.arc_proj_size))
        self.link_w = self.model.add_parameters(
            (self.config.arc_proj_size, self.config.arc_proj_size))

        self.label_ww = self.model.add_parameters(
            (1, len(self.encodings.label2int)))
        self.label_w = self.model.add_parameters(
            (len(self.encodings.label2int), self.config.label_proj_size * 2))
        self.label_bb = self.model.add_parameters(
            (len(self.encodings.label2int)))

        if not self.config.predict_morphology:
            self.aux_link_w = self.model.add_parameters(
                (self.config.arc_proj_size, self.config.arc_proj_size))
            self.aux_link_b = self.model.add_parameters(
                (1, self.config.arc_proj_size))
        else:
            self.upos_softmax_w = self.model.add_parameters(
                (len(self.encodings.upos2int), self.config.label_proj_size))
            self.xpos_softmax_w = self.model.add_parameters(
                (len(self.encodings.xpos2int), self.config.label_proj_size))
            self.attrs_softmax_w = self.model.add_parameters(
                (len(self.encodings.attrs2int), self.config.label_proj_size))

            self.upos_softmax_b = self.model.add_parameters(
                (len(self.encodings.upos2int)))
            self.xpos_softmax_b = self.model.add_parameters(
                (len(self.encodings.xpos2int)))
            self.attrs_softmax_b = self.model.add_parameters(
                (len(self.encodings.attrs2int)))
            self.lemma_softmax_b = self.model.add_parameters(
                (len(self.encodings.char2int) + 1))
            self.lemma_softmax_casing_b = self.model.add_parameters((2))

        self.aux_softmax_weight = aux_softmax_weight
        self.batch_loss = []
Beispiel #9
0
    def __init__(self,
                 word_size,
                 tag_size,
                 rel_size,
                 input_dim,
                 hidden_dim,
                 pdrop_embs,
                 pdrop_lstm,
                 pdrop_mlp,
                 layers,
                 mlp_dim,
                 arc_dim,
                 biaffine_bias_x_arc,
                 biaffine_bias_y_arc,
                 biaffine_bias_x_rel,
                 biaffine_bias_y_rel,
                 embs_word=None):

        self._global_step = 0
        self._early_stop_count = 0
        self._update = False
        self._best_score = 0.
        self._best_score_las = 0.

        self._punct_id = 0

        self._masks_w = []
        self._masks_t = []

        self._vocab_size_w = word_size
        self._vocab_size_t = tag_size
        self._vocab_size_r = rel_size

        self._mlp_dim = mlp_dim
        self._arc_dim = arc_dim
        self._rel_dim = mlp_dim - arc_dim
        self.biaffine_bias_x_arc = biaffine_bias_x_arc
        self.biaffine_bias_y_arc = biaffine_bias_y_arc
        self.biaffine_bias_x_rel = biaffine_bias_x_rel
        self.biaffine_bias_y_rel = biaffine_bias_y_rel

        self._pc = dy.ParameterCollection()

        if config.adam:
            self._trainer = dy.AdamTrainer(self._pc, config.learning_rate,
                                           config.beta_1, config.beta_2,
                                           config.epsilon)
        else:
            # self._trainer = dy.AdadeltaTrainer(self._pc)
            trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate)
            trainer.set_clip_threshold(config.clip_threshold)

        # self._trainer.set_clip_threshold(1.0)

        self.params = dict()
        if embs_word is None:
            self.lp_w = self._pc.add_lookup_parameters(
                (word_size, input_dim), init=dy.ConstInitializer(0.))
        else:
            self.lp_w = self._pc.lookup_parameters_from_numpy(embs_word)
        self.lp_t = self._pc.add_lookup_parameters(
            (tag_size, input_dim), init=dy.ConstInitializer(0.))
        self.emb_root = self._pc.add_lookup_parameters(
            (2, input_dim), init=dy.ConstInitializer(0.))

        # if config.isTest:
        #     self.l2r_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #     self.r2l_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        # else:
        #     self.l2r_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #     self.r2l_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        self._pdrop_embs = pdrop_embs
        self._pdrop_lstm = pdrop_lstm
        self._pdrop_mlp = pdrop_mlp

        self.LSTM_builders = []

        # f = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        # b = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc)
        #
        # self.LSTM_builders = [f, b]

        f = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim,
                                                 self._pc)
        b = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim,
                                                 self._pc)

        self.LSTM_builders.append((f, b))
        for i in range(layers - 1):
            f = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim,
                                                     hidden_dim, self._pc)
            b = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim,
                                                     hidden_dim, self._pc)
            self.LSTM_builders.append((f, b))

        if config.biaffine:
            W = utils.orthonormal_initializer(mlp_dim, 2 * hidden_dim)
            self.mlp_dep = self._pc.parameters_from_numpy(W)
            self.mlp_head = self._pc.parameters_from_numpy(W)
            self.mlp_dep_bias = self._pc.add_parameters(
                (mlp_dim, ), init=dy.ConstInitializer(0.))
            self.mlp_head_bias = self._pc.add_parameters(
                (mlp_dim, ), init=dy.ConstInitializer(0.))
        else:
            W = utils.orthonormal_initializer(mlp_dim * 2, 2 * hidden_dim)
            self.mlp = self._pc.parameters_from_numpy(W)
            self.mlp_bias = self._pc.add_parameters(
                (mlp_dim * 2, ), init=dy.ConstInitializer(0.))

        # self.mlp_arc_size = mlp_arc_size
        # self.mlp_rel_size = mlp_rel_size
        # self.dropout_mlp = dropout_mlp
        if config.biaffine:
            self.W_arc = self._pc.add_parameters(
                (self._arc_dim, self._arc_dim + 1),
                init=dy.ConstInitializer(0.))
            self.W_rel = self._pc.add_parameters(
                (self._vocab_size_r * (self._rel_dim + 1), self._rel_dim + 1),
                init=dy.ConstInitializer(0.))
        else:
            self.V_r_arc = self._pc.add_parameters((self._arc_dim))
            self.V_i_arc = self._pc.add_parameters((self._arc_dim))
            self.bias_arc = self._pc.add_parameters((self._arc_dim * 2))
            self.V_r_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r))
            self.V_i_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r))
            self.bias_rel = self._pc.add_parameters(
                (self._rel_dim * self._vocab_size_r * 2))

        return
    def __init__(self, config, encodings, embeddings, runtime=False):
        # INTERNAL PARAMS ###################################################
        self.config = config
        self.encodings = encodings
        self.word_embeddings = embeddings
        self.config.char_vocabulary_size = len(encodings.characters)
        self.decoder_output_class_count = 3  # O S SX
        self.decoder_output_i2c = {}
        self.decoder_output_i2c[0] = "O"
        self.decoder_output_i2c[1] = "S"
        self.decoder_output_i2c[2] = "SX"
        self.decoder_output_c2i = {}
        self.decoder_output_c2i["O"] = 0
        self.decoder_output_c2i["S"] = 1
        self.decoder_output_c2i["SX"] = 2

        # NETWORK ###########################################################
        self.model = dy.Model()
        self.trainer = dy.AdamTrainer(self.model)
        self.trainer.set_sparse_updates(False)

        # EMBEDDING SPECIAL TOKENS
        self.word_embeddings_special = self.model.add_lookup_parameters(
            (2, self.word_embeddings.word_embeddings_size
             ))  # [0] = UNK, [1] = SENTENCE START

        # ENCODER-CHAR
        self.char_embeddings = self.model.add_lookup_parameters(
            (self.config.char_vocabulary_size,
             self.config.char_embedding_size))
        # self.next_chars_embedding = self.model.add_lookup_parameters(
        #    (self.config.char_vocabulary_size, self.config.next_chars_embedding_size))

        self.char_embeddings_punctuation = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.char_embeddings_whitespace = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.char_embeddings_uppercase = self.model.add_lookup_parameters(
            (self.config.char_generic_feature_vocabulary_size,
             self.config.char_generic_feature_embedding_size))
        self.encoder_char_input_size = self.config.char_embedding_size + 3 * self.config.char_generic_feature_embedding_size
        if runtime:
            self.encoder_char_lstm1_fw_builder = dy.VanillaLSTMBuilder(
                1, self.encoder_char_input_size,
                self.config.encoder_char_lstm_size, self.model)

            self.encoder_char_lstm2_bw_builder = dy.VanillaLSTMBuilder(
                1, self.config.next_chars_embedding_size +
                3 * self.config.char_generic_feature_embedding_size,
                self.config.encoder_char_lstm_size, self.model)
            self.encoder_word_lstm_builder = dy.VanillaLSTMBuilder(
                1, self.word_embeddings.word_embeddings_size,
                self.config.encoder_word_lstm_size, self.model)
        else:
            from utils import orthonormal_VanillaLSTMBuilder
            self.encoder_char_lstm1_fw_builder = orthonormal_VanillaLSTMBuilder(
                1, self.encoder_char_input_size,
                self.config.encoder_char_lstm_size, self.model)

            self.encoder_char_lstm2_bw_builder = orthonormal_VanillaLSTMBuilder(
                1, self.config.next_chars_embedding_size +
                3 * self.config.char_generic_feature_embedding_size,
                self.config.encoder_char_lstm_size, self.model)
            self.encoder_word_lstm_builder = orthonormal_VanillaLSTMBuilder(
                1, self.word_embeddings.word_embeddings_size,
                self.config.encoder_word_lstm_size, self.model)

        # ENCODER-WORD

        # self.att_w1 = self.model.add_parameters((
        #     self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3,
        #     self.config.encoder_char_lstm_size))
        # self.att_w2 = self.model.add_parameters((
        #     self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3,
        #     self.config.encoder_char_lstm_size))
        # self.att_v = self.model.add_parameters(
        #     (1, self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3))

        # DECODER

        self.holisticWE = self.model.add_lookup_parameters(
            (len(encodings.word2int),
             self.word_embeddings.word_embeddings_size))

        self.decoder_input_size = 2 * self.config.encoder_char_lstm_size + self.config.encoder_word_lstm_size + self.word_embeddings.word_embeddings_size

        self.decoder_hiddenW = self.model.add_parameters(
            (self.config.decoder_hidden_size, self.decoder_input_size))
        self.decoder_hiddenB = self.model.add_parameters(
            (self.config.decoder_hidden_size))
        self.decoder_outputW = self.model.add_parameters(
            (self.decoder_output_class_count, self.config.decoder_hidden_size))
        self.decoder_outputB = self.model.add_parameters(
            (self.decoder_output_class_count))

        self.aux_softmax_char_peek_w = self.model.add_parameters(
            (self.decoder_output_class_count,
             self.config.encoder_char_lstm_size))
        self.aux_softmax_char_peek_b = self.model.add_parameters(
            (self.decoder_output_class_count))

        self.aux_softmax_char_hist_w = self.model.add_parameters(
            (self.decoder_output_class_count,
             self.config.encoder_char_lstm_size))
        self.aux_softmax_char_hist_b = self.model.add_parameters(
            (self.decoder_output_class_count))
        print("done")
    def __init__(self, config, encodings, embeddings, runtime=False):
        self.config = config
        self.word_embeddings = embeddings
        self.encodings = encodings

        self.modelSS = dy.Model()
        self.modelTok = dy.Model()
        self.trainerSS = dy.AdamTrainer(self.modelSS,
                                        alpha=2e-3,
                                        beta_1=0.9,
                                        beta_2=0.9)
        self.trainerTok = dy.AdamTrainer(self.modelTok,
                                         alpha=2e-3,
                                         beta_1=0.9,
                                         beta_2=0.9)

        # sentence split model
        from wrappers import CNN, CNNConvLayer, CNNPoolingLayer
        from utils import orthonormal_VanillaLSTMBuilder
        # character-level-embeddings
        self.SS_char_lookup = self.modelSS.add_lookup_parameters(
            (len(self.encodings.char2int),
             self.config.ss_char_embeddings_size))
        self.SS_char_lookup_casing = self.modelSS.add_lookup_parameters(
            (3, 5))  # lower, upper N/A
        self.SS_char_lookup_special = self.modelSS.add_lookup_parameters(
            (2, self.config.ss_char_embeddings_size + 5))
        # lstm-peek network
        if runtime:
            self.SS_peek_lstm = dy.VanillaLSTMBuilder(
                self.config.ss_peek_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_peek_lstm_size, self.modelSS)
        else:
            self.SS_peek_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.ss_peek_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_peek_lstm_size, self.modelSS)
        layer_is = self.config.ss_peek_lstm_size
        self.SS_aux_softmax_peek_w = self.modelSS.add_parameters((2, layer_is))
        self.SS_aux_softmax_peek_b = self.modelSS.add_parameters((2))
        if runtime:
            self.SS_lstm = dy.VanillaLSTMBuilder(
                self.config.ss_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_lstm_size, self.modelSS)
        else:
            self.SS_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.ss_lstm_layers,
                self.config.ss_char_embeddings_size + 5,
                self.config.ss_lstm_size, self.modelSS)

        self.SS_aux_softmax_prev_w = self.modelSS.add_parameters(
            (2, self.config.ss_lstm_size))
        self.SS_aux_softmax_prev_b = self.modelSS.add_parameters((2))

        # post MLP and softmax
        self.SS_mlp_w = []
        self.SS_mlp_b = []
        layer_is = self.config.ss_lstm_size + self.config.ss_peek_lstm_size
        for layer in self.config.ss_mlp_layers:
            self.SS_mlp_w.append(self.modelSS.add_parameters(
                (layer, layer_is)))
            self.SS_mlp_b.append(self.modelSS.add_parameters((layer)))
            layer_is = layer

        self.SS_mlp_softmax_w = self.modelSS.add_parameters((2, layer_is))
        self.SS_mlp_softmax_b = self.modelSS.add_parameters((2))

        # tokenization model
        self.TOK_char_lookup = self.modelTok.add_lookup_parameters(
            (len(self.encodings.char2int),
             self.config.tok_char_embeddings_size))
        self.TOK_char_lookup_casing = self.modelTok.add_lookup_parameters(
            (3, 5))  # lower, upper N/A
        self.TOK_char_lookup_special = self.modelTok.add_lookup_parameters(
            (2, self.config.tok_char_embeddings_size + 5))
        self.TOK_word_lookup = self.modelTok.add_lookup_parameters(
            (len(self.encodings.word2int),
             self.config.tok_word_embeddings_size))

        self.TOK_word_embeddings_special = self.modelTok.add_lookup_parameters(
            (2, self.word_embeddings.word_embeddings_size))

        self.TOK_word_proj_w = self.modelTok.add_parameters(
            (self.config.tok_word_embeddings_size,
             self.word_embeddings.word_embeddings_size))
        # lstm networks
        if runtime:
            self.TOK_backward_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_char_peek_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_peek_lstm_size, self.modelTok)
            self.TOK_forward_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_char_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_lstm_size, self.modelTok)
            self.TOK_word_lstm = dy.VanillaLSTMBuilder(
                self.config.tok_word_lstm_layers,
                self.config.tok_word_embeddings_size,
                self.config.tok_word_lstm_size, self.modelTok)
        else:
            self.TOK_backward_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_char_peek_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_peek_lstm_size, self.modelTok)
            self.TOK_forward_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_char_lstm_layers,
                self.config.tok_char_embeddings_size + 5,
                self.config.tok_char_lstm_size, self.modelTok)
            self.TOK_word_lstm = orthonormal_VanillaLSTMBuilder(
                self.config.tok_word_lstm_layers,
                self.config.tok_word_embeddings_size,
                self.config.tok_word_lstm_size, self.modelTok)

        self.TOK_mlp_w = []
        self.TOK_mlp_b = []
        layer_input = self.config.tok_word_lstm_size + self.config.tok_char_lstm_size + self.config.tok_char_peek_lstm_size + 2 + self.config.tok_word_embeddings_size
        for layer_size in self.config.tok_mlp_layers:
            self.TOK_mlp_w.append(
                self.modelTok.add_parameters((layer_size, layer_input)))
            self.TOK_mlp_b.append(self.modelTok.add_parameters((layer_size)))
            layer_input = layer_size

        self.TOK_softmax_w = self.modelTok.add_parameters((2, layer_input))
        self.TOK_softmax_b = self.modelTok.add_parameters((2))
        self.TOK_softmax_peek_w = self.modelTok.add_parameters(
            (2, self.config.tok_char_peek_lstm_size))
        self.TOK_softmax_peek_b = self.modelTok.add_parameters((2))
        self.TOK_softmax_prev_w = self.modelTok.add_parameters(
            (2, self.config.tok_char_lstm_size))
        self.TOK_softmax_prev_b = self.modelTok.add_parameters((2))

        self.losses = []
        self.losses_tok = []