from languageflow.flow import Flow from languageflow.model import Model from languageflow.model.crf import CRF from languageflow.transformer.tagged import TaggedTransformer from languageflow.validation.validation import TrainTestSplitValidation from load_data import load_data from utils.scorer import iob_score if __name__ == '__main__': # =========================================================================# # Start an experiment with flow # =========================================================================# flow = Flow() flow.log_folder = join(dirname(__file__), "logs") # =========================================================================# # Data # =========================================================================# # for saving model sentences = [] for f in ["train.txt", "dev.txt", "test.txt"]: file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", f) sentences.append(load_data(file)) train_sentences = sentences[0] + sentences[1] test_sentences = sentences[2] train_sentences = train_sentences # flow.data(sentences=sentences)
from os.path import dirname, join from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.tfidf import TfidfVectorizer from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset from sklearn.linear_model import SGDClassifier if __name__ == '__main__': data_file = join(dirname(dirname(dirname(__file__))), "data", "fb_bank_sentiment", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.log_folder = "log" flow.data(X, y) transformer = TfidfVectorizer(ngram_range=(1, 3)) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SGDClassifier()), "SGD")) # flow.set_learning_curve(0.7, 1, 0.3) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) flow.train() flow.export(model_name="SGD", export_folder="model")