def __init__(self, character_embeddings_size, encodings, rnn_size=100, rnn_layers=1, embeddings_size=100, model=None, runtime=False): if model is None: self.model = dy.Model() else: self.model = model self.encodings = encodings self.character_embeddings_size = character_embeddings_size self.embeddings_size = embeddings_size self.num_characters = len(encodings.char2int) self.character_lookup = self.model.add_lookup_parameters( (self.num_characters, character_embeddings_size)) self.rnn_fw = [] self.rnn_bw = [] self.rnn_layers = rnn_layers self.rnn_size = rnn_size input_size = character_embeddings_size + 3 for _ in range(rnn_layers): if runtime: self.rnn_fw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) else: from generic_networks.utils import orthonormal_VanillaLSTMBuilder self.rnn_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) input_size = rnn_size * 2 self.linearW = self.model.add_parameters( (embeddings_size, rnn_size * 4)) # last state and attention over the other states self.linearB = self.model.add_parameters((embeddings_size)) self.att_w1 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_v = self.model.add_parameters((1, rnn_size))
def __init__(self, src_we, dst_we, input_encodings, output_encodings, config): self.config = config self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.src_we = src_we self.dst_we = dst_we self.input_encodings = input_encodings self.output_encodings = output_encodings # encoder self.encoder_fw = [] self.encoder_bw = [] input_size = config.input_size for layer_size in self.config.encoder_layers: self.encoder_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) self.encoder_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) input_size = layer_size * 2 # decoder #self.decoder = [] #for layer_size in self.config.decoder_layers: self.decoder = orthonormal_VanillaLSTMBuilder( config.decoder_layers, input_size + self.config.input_size, config.decoder_size, self.model) input_size = config.decoder_size # output softmax self.output_softmax_w = self.model.add_parameters( (len(self.output_encodings.word2int) + 1, input_size)) self.output_softmax_b = self.model.add_parameters( (len(self.output_encodings.word2int) + 1)) self.EOS = len(self.output_encodings.word2int) # aux WE layer self.aux_layer_w = self.model.add_parameters( (self.config.aux_we_layer_size, self.config.decoder_size)) self.aux_layer_b = self.model.add_parameters( (self.config.aux_we_layer_size)) # aux WE projection self.aux_layer_proj_w = self.model.add_parameters( (self.dst_we.word_embeddings_size, self.config.aux_we_layer_size)) self.aux_layer_proj_b = self.model.add_parameters( (self.dst_we.word_embeddings_size)) # input projection self.word_proj_w = self.model.add_parameters( (self.config.input_size, self.src_we.word_embeddings_size)) self.word_proj_b = self.model.add_parameters((self.config.input_size)) self.hol_we_src = self.model.add_lookup_parameters( (len(self.input_encodings.word2int), self.config.input_size)) self.hol_we_dst = self.model.add_lookup_parameters( (len(self.output_encodings.word2int), self.config.input_size)) self.special_we = self.model.add_lookup_parameters( (2, self.config.input_size)) # attention self.att_w1 = self.model.add_parameters( (self.config.encoder_layers[-1] * 2, self.config.encoder_layers[-1] * 2)) self.att_w2 = self.model.add_parameters( (self.config.encoder_layers[-1] * 2, self.config.decoder_size)) self.att_v = self.model.add_parameters( (1, self.config.encoder_layers[-1] * 2))
def __init__(self, parser_config, encodings, embeddings, aux_softmax_weight=0.2, runtime=False): self.config = parser_config self.encodings = encodings self.embeddings = embeddings self.decoder = GreedyDecoder() self.model = dy.Model() # self.trainer = dy.SimpleSGDTrainer(self.model) self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.trainer.set_sparse_updates(False) self.character_network = CharacterNetwork( 100, encodings, rnn_size=200, rnn_layers=1, embeddings_size=self.config.input_embeddings_size, model=self.model, runtime=runtime) self.holistic_embeddings = self.model.add_lookup_parameters( (len(self.encodings.word2int), self.config.input_embeddings_size)) self.input_proj_w_word = self.model.add_parameters( (self.config.input_embeddings_size, self.embeddings.word_embeddings_size)) self.input_proj_b_word = self.model.add_parameters( (self.config.input_embeddings_size)) self.unknown_word_embedding = self.model.add_lookup_parameters( (3, self.config.input_embeddings_size)) # for padding lexical self.pad_tag_embedding = self.model.add_lookup_parameters( (3, self.config.input_embeddings_size)) # for padding morphology self.bdrnn_fw = [] self.bdrnn_bw = [] rnn_input_size = 0 if self.config.use_lexical: rnn_input_size += self.config.input_embeddings_size if self.config.use_morphology: rnn_input_size += self.config.input_embeddings_size self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.input_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.input_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.input_embeddings_size)) index = 0 aux_proj_input_size = 0 for layer_size in self.config.layers: if runtime: self.bdrnn_fw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) else: self.bdrnn_fw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) rnn_input_size = layer_size * 2 index += 1 if index == self.config.aux_softmax_layer: aux_proj_input_size = rnn_input_size proj_input_size = self.config.layers[-1] * 2 self.proj_arc_w_head = self.model.add_parameters( (self.config.arc_proj_size, proj_input_size)) self.proj_arc_b_head = self.model.add_parameters( (self.config.arc_proj_size)) self.proj_arc_w_dep = self.model.add_parameters( (self.config.arc_proj_size, proj_input_size)) self.proj_arc_b_dep = self.model.add_parameters( (self.config.arc_proj_size)) self.proj_label_w_head = self.model.add_parameters( (self.config.label_proj_size, proj_input_size)) self.proj_label_b_head = self.model.add_parameters( (self.config.label_proj_size)) self.proj_label_w_dep = self.model.add_parameters( (self.config.label_proj_size, proj_input_size)) self.proj_label_b_dep = self.model.add_parameters( (self.config.label_proj_size)) if not self.config.predict_morphology: self.aux_proj_arc_w_head = self.model.add_parameters( (self.config.arc_proj_size, aux_proj_input_size)) self.aux_proj_arc_b_head = self.model.add_parameters( (self.config.arc_proj_size)) self.aux_proj_arc_w_dep = self.model.add_parameters( (self.config.arc_proj_size, aux_proj_input_size)) self.aux_proj_arc_b_dep = self.model.add_parameters( (self.config.arc_proj_size)) else: self.upos_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.xpos_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.attrs_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.upos_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.xpos_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.attrs_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.link_b = self.model.add_parameters((1, self.config.arc_proj_size)) self.link_w = self.model.add_parameters( (self.config.arc_proj_size, self.config.arc_proj_size)) self.label_ww = self.model.add_parameters( (1, len(self.encodings.label2int))) self.label_w = self.model.add_parameters( (len(self.encodings.label2int), self.config.label_proj_size * 2)) self.label_bb = self.model.add_parameters( (len(self.encodings.label2int))) if not self.config.predict_morphology: self.aux_link_w = self.model.add_parameters( (self.config.arc_proj_size, self.config.arc_proj_size)) self.aux_link_b = self.model.add_parameters( (1, self.config.arc_proj_size)) else: self.upos_softmax_w = self.model.add_parameters( (len(self.encodings.upos2int), self.config.label_proj_size)) self.xpos_softmax_w = self.model.add_parameters( (len(self.encodings.xpos2int), self.config.label_proj_size)) self.attrs_softmax_w = self.model.add_parameters( (len(self.encodings.attrs2int), self.config.label_proj_size)) self.upos_softmax_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.xpos_softmax_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.attrs_softmax_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.lemma_softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 1)) self.lemma_softmax_casing_b = self.model.add_parameters((2)) self.aux_softmax_weight = aux_softmax_weight self.batch_loss = []
def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False): self.config = lemmatizer_config self.encodings = encodings # Bug in encodings - this will be removed after UD Shared Task self.has_bug = False if self.encodings.char2int[' '] != 1: self.has_bug = True self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork( self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters( (len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) else: from generic_networks.utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) self.att_w1 = self.model.add_parameters( (200, self.config.char_rnn_size * 2)) self.att_w2 = self.model.add_parameters( (200, self.config.rnn_size + self.config.tag_embeddings_size)) self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings)) self.softmax_w = self.model.add_parameters( (len(self.encodings.char2int) + 1, self.config.rnn_size)) self.softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 1)) self.softmax_casing_w = self.model.add_parameters( (2, self.config.rnn_size)) self.softmax_casing_b = self.model.add_parameters((2))
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.encodings = encodings # Bug in encodings - will be removed after UD self.has_bug = False if self.encodings.char2int[' '] != 1: self.has_bug = True import sys sys.stdout.write("Detected encodings BUG!") self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork( self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.word2lemma = {} self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters( (len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size, self.config.rnn_size, self.model) else: from generic_networks.utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size, self.config.rnn_size, self.model) # self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2)) # self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size)) # self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size)) self.softmax_w = self.model.add_parameters( (len(self.encodings.char2int) + 3, self.config.rnn_size)) self.softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 3)) ofs = len(self.encodings.char2int) self.label2int = {} self.label2int['<EOS>'] = ofs self.label2int['<COPY>'] = ofs + 1 self.label2int['<INC>'] = ofs + 2
def __init__(self, config, encodings, embeddings, runtime=False): # INTERNAL PARAMS ################################################### self.config = config self.encodings = encodings self.word_embeddings = embeddings self.config.char_vocabulary_size = len(encodings.characters) self.decoder_output_class_count = 3 # O S SX self.decoder_output_i2c = {} self.decoder_output_i2c[0] = "O" self.decoder_output_i2c[1] = "S" self.decoder_output_i2c[2] = "SX" self.decoder_output_c2i = {} self.decoder_output_c2i["O"] = 0 self.decoder_output_c2i["S"] = 1 self.decoder_output_c2i["SX"] = 2 # NETWORK ########################################################### self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.trainer.set_sparse_updates(False) # EMBEDDING SPECIAL TOKENS self.word_embeddings_special = self.model.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size )) # [0] = UNK, [1] = SENTENCE START # ENCODER-CHAR self.char_embeddings = self.model.add_lookup_parameters( (self.config.char_vocabulary_size, self.config.char_embedding_size)) # self.next_chars_embedding = self.model.add_lookup_parameters( # (self.config.char_vocabulary_size, self.config.next_chars_embedding_size)) self.char_embeddings_punctuation = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_whitespace = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_uppercase = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.encoder_char_input_size = self.config.char_embedding_size + 3 * self.config.char_generic_feature_embedding_size if runtime: self.encoder_char_lstm1_fw_builder = dy.VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = dy.VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = dy.VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) else: from generic_networks.utils import orthonormal_VanillaLSTMBuilder self.encoder_char_lstm1_fw_builder = orthonormal_VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = orthonormal_VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = orthonormal_VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) # ENCODER-WORD # self.att_w1 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_w2 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_v = self.model.add_parameters( # (1, self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3)) # DECODER self.holisticWE = self.model.add_lookup_parameters( (len(encodings.word2int), self.word_embeddings.word_embeddings_size)) self.decoder_input_size = 2 * self.config.encoder_char_lstm_size + self.config.encoder_word_lstm_size + self.word_embeddings.word_embeddings_size self.decoder_hiddenW = self.model.add_parameters( (self.config.decoder_hidden_size, self.decoder_input_size)) self.decoder_hiddenB = self.model.add_parameters( (self.config.decoder_hidden_size)) self.decoder_outputW = self.model.add_parameters( (self.decoder_output_class_count, self.config.decoder_hidden_size)) self.decoder_outputB = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_peek_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_peek_b = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_hist_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_hist_b = self.model.add_parameters( (self.decoder_output_class_count)) print("done")
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.word_embeddings = embeddings self.encodings = encodings self.modelSS = dy.Model() self.modelTok = dy.Model() self.trainerSS = dy.AdamTrainer(self.modelSS, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.trainerTok = dy.AdamTrainer(self.modelTok, alpha=2e-3, beta_1=0.9, beta_2=0.9) # sentence split model from generic_networks.wrappers import CNN, CNNConvLayer, CNNPoolingLayer from generic_networks.utils import orthonormal_VanillaLSTMBuilder # character-level-embeddings self.SS_char_lookup = self.modelSS.add_lookup_parameters( (len(self.encodings.char2int), self.config.ss_char_embeddings_size)) self.SS_char_lookup_casing = self.modelSS.add_lookup_parameters( (3, 5)) # lower, upper N/A self.SS_char_lookup_special = self.modelSS.add_lookup_parameters( (2, self.config.ss_char_embeddings_size + 5)) # lstm-peek network if runtime: self.SS_peek_lstm = dy.VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) else: self.SS_peek_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) layer_is = self.config.ss_peek_lstm_size self.SS_aux_softmax_peek_w = self.modelSS.add_parameters((2, layer_is)) self.SS_aux_softmax_peek_b = self.modelSS.add_parameters((2)) if runtime: self.SS_lstm = dy.VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) else: self.SS_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) self.SS_aux_softmax_prev_w = self.modelSS.add_parameters( (2, self.config.ss_lstm_size)) self.SS_aux_softmax_prev_b = self.modelSS.add_parameters((2)) # post MLP and softmax self.SS_mlp_w = [] self.SS_mlp_b = [] layer_is = self.config.ss_lstm_size + self.config.ss_peek_lstm_size for layer in self.config.ss_mlp_layers: self.SS_mlp_w.append(self.modelSS.add_parameters( (layer, layer_is))) self.SS_mlp_b.append(self.modelSS.add_parameters((layer))) layer_is = layer self.SS_mlp_softmax_w = self.modelSS.add_parameters((2, layer_is)) self.SS_mlp_softmax_b = self.modelSS.add_parameters((2)) # tokenization model self.TOK_char_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.char2int), self.config.tok_char_embeddings_size)) self.TOK_char_lookup_casing = self.modelTok.add_lookup_parameters( (3, 5)) # lower, upper N/A self.TOK_char_lookup_special = self.modelTok.add_lookup_parameters( (2, self.config.tok_char_embeddings_size + 5)) self.TOK_word_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.word2int), self.config.tok_word_embeddings_size)) self.TOK_word_embeddings_special = self.modelTok.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size)) self.TOK_word_proj_w = self.modelTok.add_parameters( (self.config.tok_word_embeddings_size, self.word_embeddings.word_embeddings_size)) self.TOK_word_peek_proj_w = self.modelTok.add_parameters( (self.config.tok_word_embeddings_size, self.word_embeddings.word_embeddings_size)) # lstm networks if runtime: self.TOK_backward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = dy.VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) else: self.TOK_backward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) self.TOK_mlp_w = [] self.TOK_mlp_b = [] layer_input = self.config.tok_word_lstm_size + self.config.tok_char_lstm_size + self.config.tok_char_peek_lstm_size + 2 + self.config.tok_word_embeddings_size for layer_size in self.config.tok_mlp_layers: self.TOK_mlp_w.append( self.modelTok.add_parameters((layer_size, layer_input))) self.TOK_mlp_b.append(self.modelTok.add_parameters((layer_size))) layer_input = layer_size self.TOK_softmax_w = self.modelTok.add_parameters((2, layer_input)) self.TOK_softmax_b = self.modelTok.add_parameters((2)) self.TOK_softmax_peek_w = self.modelTok.add_parameters( (2, self.config.tok_char_peek_lstm_size)) self.TOK_softmax_peek_b = self.modelTok.add_parameters((2)) self.TOK_softmax_prev_w = self.modelTok.add_parameters( (2, self.config.tok_char_lstm_size)) self.TOK_softmax_prev_b = self.modelTok.add_parameters((2)) self.losses = [] self.losses_tok = []
def __init__(self, tagger_config, encodings, embeddings, aux_softmax_weight=0.2, runtime=False): self.config = tagger_config self.encodings = encodings self.embeddings = embeddings self.model = dy.Model() self.trainer = dy.AdamTrainer( self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) # dy.MomentumSGDTrainer(self.model) self.trainer.set_sparse_updates(False) self.character_network = CharacterNetwork( 100, encodings, rnn_size=200, rnn_layers=1, embeddings_size=self.embeddings.word_embeddings_size, model=self.model, runtime=runtime) self.unknown_word_embedding = self.model.add_lookup_parameters( (1, self.embeddings.word_embeddings_size)) self.holistic_word_embedding = self.model.add_lookup_parameters( (len(encodings.word2int), self.embeddings.word_embeddings_size)) self.char_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.emb_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.hol_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.bdrnn_fw = [] self.bdrnn_bw = [] rnn_input_size = self.config.input_size # self.embeddings.word_embeddings_size aux_softmax_input_size = 0 index = 0 for layer_size in self.config.layers: if runtime: self.bdrnn_fw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) else: self.bdrnn_fw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) rnn_input_size = layer_size * 2 index += 1 if index == self.config.aux_softmax_layer: aux_softmax_input_size = rnn_input_size self.mlps = [] for _ in range(3): # upos, xpos and attrs mlp_w = [] mlp_b = [] input_sz = self.config.layers[-1] * 2 for l_size in self.config.presoftmax_mlp_layers: mlp_w.append(self.model.add_parameters((l_size, input_sz))) mlp_b.append(self.model.add_parameters((l_size))) input_sz = l_size self.mlps.append([mlp_w, mlp_b]) softmax_input_size = self.config.presoftmax_mlp_layers[-1] self.softmax_upos_w = self.model.add_parameters( (len(self.encodings.upos2int), softmax_input_size)) self.softmax_upos_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.softmax_xpos_w = self.model.add_parameters( (len(self.encodings.xpos2int), softmax_input_size)) self.softmax_xpos_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.softmax_attrs_w = self.model.add_parameters( (len(self.encodings.attrs2int), softmax_input_size)) self.softmax_attrs_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.aux_softmax_upos_w = self.model.add_parameters( (len(self.encodings.upos2int), aux_softmax_input_size)) self.aux_softmax_upos_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.aux_softmax_xpos_w = self.model.add_parameters( (len(self.encodings.xpos2int), aux_softmax_input_size)) self.aux_softmax_xpos_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.aux_softmax_attrs_w = self.model.add_parameters( (len(self.encodings.attrs2int), aux_softmax_input_size)) self.aux_softmax_attrs_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.aux_softmax_weight = aux_softmax_weight self.losses = []