def test_export(self): flow = Flow() X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) y = np.array([1, 1, 2, 2]) flow.data(X=X, y=y) model = Model(SGDClassifier(), "SGDClassfier") flow.add_model(model) try: mkdir("temp") except: pass flow.export("SGDClassfier", export_folder="temp") shutil.rmtree("temp")
from os.path import dirname, join import sys from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.count import CountVectorizer from sklearn.svm import SVC from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.data(X, y) transformer = CountVectorizer(ngram_range=(1, 2), max_df=0.5, min_df=8) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SVC(kernel='linear')), "SVC")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.train() flow.export(model_name="SVC", export_folder="model")
import os from languageflow.flow import Flow from languageflow.model import Model from languageflow.model.crf import CRF from languageflow.transformer.tagged import TaggedTransformer from languageflow.validation.validation import TrainTestSplitValidation from load_data import load_data from utils.scorer import iob_score if __name__ == '__main__': # =========================================================================# # Start an experiment with flow # =========================================================================# flow = Flow() flow.log_folder = join(dirname(__file__), "logs") # =========================================================================# # Data # =========================================================================# # for saving model sentences = [] for f in ["train.txt", "dev.txt", "test.txt"]: file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", f) sentences.append(load_data(file)) train_sentences = sentences[0] + sentences[1] test_sentences = sentences[2] train_sentences = train_sentences
from os.path import dirname, join from languageflow.flow import Flow from languageflow.model import Model from languageflow.model.crf import CRF from languageflow.transformer.tagged import TaggedTransformer from languageflow.validation.validation import TrainTestSplitValidation from preprocess import vlsp2016 if __name__ == '__main__': # =========================================================================# # Start an experiment with flow # =========================================================================# flow = Flow() flow.log_folder = join(dirname(__file__), "logs") # =========================================================================# # Data # =========================================================================# # for quick experiment # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt") # sentences = vlsp2016.load_data(file) # for evaluation # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt") # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt") # sentences = vlsp2016.load_data(file) # for saving model
import sys from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.count import CountVectorizer from sklearn.linear_model import LogisticRegression from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.data(X, y) transformer = CountVectorizer(ngram_range=(1, 3), max_df=0.8, min_df=8, max_features=5000) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model( Model(OneVsRestClassifier(LogisticRegression()), "LogisticRegression")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) flow.train() flow.export(model_name="LogisticRegression", export_folder="model")