def __init__(self,params): super(LSTM, self).__init__() self.params = params self.hidden_size = params.hidden_size self.embedding_word = LoadEmbedding(params.word_num, params.embed_dim) self.embedding_word.load_pretrained_embedding(params.embedding_path, params.words_dict, params.save_words_embedding, binary = False) self.embedding_label = LoadEmbedding(params.label_num, params.embed_dim) self.embedding_label.load_pretrained_embedding(params.embedding_path, params.labels_dict, params.save_labels_embedding, binary = False) self.bilstm =nn.LSTM(params.embed_dim, params.hidden_size, dropout=params.dropout, num_layers=params.num_layers, batch_first=True, bidirectional=True) self.linear1 = nn.Linear(params.hidden_size * 2, params.hidden_size // 2) self.linear2 = nn.Linear(params.hidden_size // 2 ,params.label_num) self.crf = crf.CRF(params)
def setUp(self): self.matrix = 0.001 + np.random.poisson(lam=1.5, size=(3, 3)).astype( np.float) self.vector = 0.001 + np.random.poisson(lam=1.5, size=(3, )).astype( np.float) self.M = 0.001 + np.random.poisson(lam=1.5, size=(10, 3, 3)).astype( np.float) labels = ['A', 'B', 'C'] obsrvs = ['a', 'b', 'c', 'd', 'e', 'f'] lbls = [crf.START] + labels + [crf.END] transition_functions = [ lambda yp, y, x_v, i, _yp=_yp, _y=_y: 1 if yp == _yp and y == _y else 0 for _yp in lbls[:-1] for _y in lbls[1:] ] observation_functions = [ lambda yp, y, x_v, i, _y=_y, _x=_x: 1 if i < len(x_v) and y == _y and x_v[i] == _x else 0 for _y in labels for _x in obsrvs ] self.crf = crf.CRF(labels=labels, feature_functions=transition_functions + observation_functions)
def build(self): """Builds the neural architecture (based on the parameters in self.params)""" inputs = [] embeddings = [] word_ids = keras.layers.Input( shape=(None,), dtype='int32', name='word_input') inputs.append(word_ids) # Initialise the embedding layer with the pretrained embeddings from Spacy initial_embeddings = np.zeros( (self.params["vocab_size"], self.nlp.vocab.vectors_length)) for x in self.nlp.vocab: if x.norm_ in self.indices: initial_embeddings[self.indices[x.norm_], :] = x.vector # Construct the token-level embeddings embedding_layer = keras.layers.Embedding(input_dim=initial_embeddings.shape[0], output_dim=initial_embeddings.shape[1], trainable=self.params["trainable_word_embeddings"], embeddings_initializer=keras.initializers.Constant( initial_embeddings), mask_zero=False, name='word_embedding') word_embeddings = embedding_layer(word_ids) if self.params["word_emb_transform_dim"]: transform = keras.layers.Dense(self.params["word_emb_transform_dim"], activation="relu", name="word_embeddings_transform") word_embeddings = transform(word_embeddings) embeddings.append(word_embeddings) # build character-based embeddings if self.params["char_embedding_dim"]: char_ids = keras.layers.Input( shape=(None, self.params["max_token_length"]), dtype='int16', name='char_input') inputs.append(char_ids) char_embedding_layer = keras.layers.Embedding(input_dim=len(CHARACTERS)+1, output_dim=self.params["char_embedding_dim"], mask_zero=False, name='char_embedding') char_embeddings = char_embedding_layer(char_ids) # Build a biLSTM at the character level if self.params["char_lstm_dim"]: char_lstm_layer = keras.layers.LSTM( self.params["char_lstm_dim"]) char_bilstm_layer = keras.layers.Bidirectional(char_lstm_layer) distributed_layer = keras.layers.TimeDistributed( char_bilstm_layer, name="char_lstm") char_lstm_embeddings = distributed_layer(char_embeddings) embeddings.append(char_lstm_embeddings) # Otherwise, do a max-pooling on the character embeddings else: # NOTE: we should perform masking to avoid taking 0 values into account char_pooling_layer = keras.layers.GlobalMaxPooling1D() distributed_layer = keras.layers.TimeDistributed( char_pooling_layer, name="char_pooling") char_pooled_embeddings = distributed_layer(char_embeddings) embeddings.append(char_pooled_embeddings) # Use context-sensitive word embeddings from roBERTa if self.params["use_roberta_embeddings"]: roberta_embeddings = keras.layers.Input( shape=(None, 768), dtype='float32', name="roberta_embeddings") inputs.append(roberta_embeddings) embeddings.append(roberta_embeddings) # Concatenate all the embeddings (usual word embeddings + character-level embeddings + roBerta) into one large vector if len(embeddings) > 1: token_embeddings = keras.layers.Concatenate(axis=-1)(embeddings) else: token_embeddings = embeddings[0] # Perform dropout dropout = keras.layers.Dropout( self.params["dropout"], name="token_dropout") token_embeddings = dropout(token_embeddings) # Add convolutional layers for i in range(self.params["nb_convo_layers"]): convo = keras.layers.Conv1D(self.params["token_filter_dim"], kernel_size=self.params["token_kernel_size"], padding='same', activation="relu", name="token_convolution_%i" % (i+1)) token_embeddings = convo(token_embeddings) # Add a biLSTM layer if self.params["token_lstm_dim"]: token_lstm_layer = keras.layers.LSTM( self.params["token_lstm_dim"], return_sequences=True) token_bilstm_layer = keras.layers.Bidirectional( token_lstm_layer, name="token_biLSTM") token_embeddings = token_bilstm_layer(token_embeddings) # Add a dense layer after convolutions + biLSTM if self.params["dense_dim"]: dense_layer = keras.layers.Dense( self.params["dense_dim"], activation="relu", name="token_dense") final_token_embeddings = dense_layer(token_embeddings) else: final_token_embeddings = token_embeddings # Create final layer (CRF or softmax layer) if self.params["use_crf"]: output_layer = crf.CRF(len(self.label_indices), learn_mode="marginal", test_mode="marginal", name="crf_output") loss = output_layer.loss_function else: output_layer = keras.layers.Dense(len(self.label_indices), name="softmax_output", activation="softmax") loss = "categorical_crossentropy" # Create final model output = output_layer(final_token_embeddings) self.model = keras.models.Model(inputs=inputs, outputs=output) optimiser = getattr(keras.optimizers, self.params["optimiser"])( lr=self.params["lr"]) self.model.compile(loss=loss, optimizer=optimiser, weighted_metrics=[ "categorical_accuracy"], sample_weight_mode="temporal") return self.model
def __init__(self, config, freeze_bert=False, tagset=None, tagset_flat=None, hidden_dim=100, flat_hidden_dim=100, device=None): super(Tagger, self).__init__(config) self.tagset = tagset self.tagset_flat = tagset_flat self.device = device self.crf = crf.CRF(len(self.tagset), device) self.rev_tagset = {tagset[v]: v for v in tagset} self.rev_tagset[len(tagset)] = "O" self.rev_tagset[len(tagset) + 1] = "O" self.num_labels = len(tagset) + 2 self.num_labels_flat = len(tagset_flat) self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False, do_basic_tokenize=False) self.bert = BertModel.from_pretrained("bert-base-cased") self.bert.eval() if freeze_bert: for param in self.bert.parameters(): param.requires_grad = False self.hidden_dim = hidden_dim self.layered_dropout = nn.Dropout(0.20) self.lstm1 = nn.LSTM(modelSize, hidden_dim, bidirectional=True, batch_first=True) self.hidden2tag1 = nn.Linear(hidden_dim * 2, self.num_labels) self.lstm2 = nn.LSTM(2 * hidden_dim, hidden_dim, bidirectional=True, batch_first=True) self.hidden2tag2 = nn.Linear(hidden_dim * 2, self.num_labels) self.lstm3 = nn.LSTM(2 * hidden_dim, hidden_dim, bidirectional=True, batch_first=True) self.hidden2tag3 = nn.Linear(hidden_dim * 2, self.num_labels) self.flat_dropout = nn.Dropout(0.5) self.flat_hidden_dim = flat_hidden_dim self.flat_lstm = nn.LSTM(modelSize, self.flat_hidden_dim, bidirectional=True, batch_first=True, num_layers=1) self.flat_classifier = nn.Linear(2 * self.flat_hidden_dim, self.num_labels_flat) param_group = [] self.bert_params = {} self.everything_else_params = {}
cursor.execute(sql) except: pass db.commit() if __name__ == '__main__': rules = rule.Rule() rules.add_dict("dict/dict.txt") rules.add_dict("dict/ws_dict.txt") # # print "begin:train" rules.gen_dict("data/mix_dp.txt", "newdata/dict.txt") print rules.judge("newdata/mix_standard.txt", "newdata/dict.txt") # # # rules.load_model("rule_model.txt") rules.gen_rule("data/mix_dp.txt", "newdata/rule.txt", 0.5, 10) print rules.judge("newdata/mix_standard.txt", "newdata/rule.txt") crf = crf.CRF() crf.read_rule_model("rule_model.txt") crf.train("data/yuliao.txt_0.ban.ws.pos.dp", "newdata/standard.txt", "model.txt") # crf.train("data/mix_dp.txt","newdata/mix_standard.txt","model.txt") crf.gen_rule("model.txt", "data/mix_dp.txt", "newdata/crf.txt") print crf.judge("newdata/mix_standard.txt", "newdata/crf.txt")