from sklearn.linear_model import LogisticRegression from sklearn.multiclass import OneVsRestClassifier from underthesea_flow.flow import Flow from underthesea_flow.model import Model from underthesea_flow.validation.validation import TrainTestSplitValidation from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset from transformer import TfidfVectorizer from sklearn.linear_model import SGDClassifier if __name__ == '__main__': data_file = join(dirname(dirname(dirname(__file__))), "data", "fb_bank_category", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.log_folder = "log" flow.data(X, y) transformer = TfidfVectorizer(ngram_range=(1, 3)) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SGDClassifier()), "SGD")) # flow.set_learning_curve(0.7, 1, 0.3) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) flow.train() flow.export_folder = "model"
from os.path import dirname, join from underthesea_flow.flow import Flow from underthesea_flow.model import Model from underthesea_flow.model.crf import CRF from underthesea_flow.transformer.tagged import TaggedTransformer from underthesea_flow.validation.validation import TrainTestSplitValidation from preprocess import vlsp2016 if __name__ == '__main__': # =========================================================================# # Start an experiment with flow # =========================================================================# flow = Flow() flow.log_folder = join(dirname(__file__), "logs") # =========================================================================# # Data # =========================================================================# # for quick experiment # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt") # sentences = vlsp2016.load_data(file) # for evaluation # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt") # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt") # sentences = vlsp2016.load_data(file) # for saving model sentences = []
from os.path import dirname, join from underthesea_flow.flow import Flow from underthesea_flow.model import Model from underthesea_flow.validation.validation import TrainTestSplitValidation from load_data import load_dataset from model.model_fasttext import FastTextClassifier if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank_act_2", "corpus", "data.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.log_folder = "log" flow.data(X, y) flow.add_model(Model(FastTextClassifier(), "FastText")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.validation() model_name = "FastText" model_filename = join("model", "fasttext.model") flow.train() flow.save_model(model_name="FastText", model_filename=model_filename)