def train(train_file): print(train_file) # load data train_set = [] train_set += load_dataset(train_file) print("Load corpus from file", train_file) transformer = CustomTransformer(template) X, y = transformer.transform(train_set) # train crf_params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } model_path = join(dirname(__file__), "model", "model.bin") X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.01) estimator = CRF(params=crf_params, filename=model_path) estimator.fit(X_train, y_train) y_pred = estimator.predict(X_dev) f1_score = metrics.flat_f1_score(y_dev, y_pred, average='weighted') print("Dev score: ", f1_score)
def train(train_X, train_Y): from languageflow.model.crf import CRF logging.info("Step 3: Training CRF model") start = time.time() crf_params = { "c1": 0.1, # coefficient for L1 penalty "c2": 1e-3, # coefficient for L2 penalty "max_iterations": 5000, # "feature.possible_transitions": True, } crf = CRF(params=crf_params, filename=model_path) crf.fit(train_X, train_Y) end = time.time() logging.info(to_time_message(start, end)) return crf
def train(train_path, model_path): train_set = [] train_set += load_dataset(train_path) print("Load data from file", train_path) transformer = CustomTransformer(template) X, y = transformer.transform(train_set) # train params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } folder = dirname(model_path) try: makedirs(folder) except: pass estimator = CRF(params=params, filename=model_path) estimator.fit(X, y)
"T[-3][3]", "T[-2][3]", "T[-1][3]", ] transformer = TaggedTransformer(template) # flow.transform(transformer) X_train, y_train = transformer.transform(train_sentences) X_test, y_test = transformer.transform(test_sentences) # =========================================================================# # Models # =========================================================================# parameters = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } filename = join(dirname(__file__), 'model', 'crf.model') estimator = CRF(params=parameters, filename=filename) estimator.fit(X_train, y_train) # =========================================================================# # Evaluation # =========================================================================# y_pred = estimator.predict(X_test) f1_score = iob_score(y_test, y_pred) print(f1_score)
if __name__ == '__main__': # load data train_set = [] for f in ["train.txt", "dev.txt", "test.txt"]: file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", f) train_set += load_dataset(file) # transformer transformer = CustomTransformer(template) X, y = transformer.transform(train_set) # train crf_params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } model_path = join(dirname(__file__), "final_model", "model.bin") X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.01) estimator = CRF(params=crf_params, filename=model_path) estimator.fit(X_train, y_train) y_pred = estimator.predict(X_dev) f1_score = metrics.flat_f1_score(y_dev, y_pred, average='weighted') print("Dev score: ", f1_score) joblib.dump(transformer, "final_model/transformer.bin")
"T[-3][3]", "T[-2][3]", "T[-1][3]", ] transformer = TaggedTransformer(template) flow.transform(transformer) # =========================================================================# # Models # =========================================================================# crf_params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } flow.add_model(Model(CRF(params=crf_params), "CRF")) # =========================================================================# # Evaluation # =========================================================================# flow.add_score('f1_chunk') flow.add_score('accuracy_chunk') flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.set_validation(TrainTestSplitValidation(test_size=0.3)) flow.train() # flow.save_model("CRF", filename="ner_crf_20171006_template_2.model")
from load_data import load_data from models.custom_transformer import CustomTransformer from models.features import template if __name__ == '__main__': train_path = join(dirname(__file__), "data", "corpus", "train.txt") train_set = [] train_set += load_data(train_path) print("Load data from file", train_path) transformer = CustomTransformer(template) X_train, y_train = transformer.transform(train_set) # train params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } model_path = join(dirname(__file__), "models", "model_crf.bin") folder = dirname(model_path) try: makedirs(folder) except: pass estimator = CRF(params=params, filename=model_path) estimator.fit(X_train, y_train) transformer_path = join(dirname(__file__), "models", "transformer.bin") joblib.dump(transformer, transformer_path)
def test_crf(self): crf = CRF() crf.fit(['x', 'y'], ['a', 'b'])