def test_export(self): flow = Flow() X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) y = np.array([1, 1, 2, 2]) flow.data(X=X, y=y) model = Model(SGDClassifier(), "SGDClassfier") flow.add_model(model) try: mkdir("temp") except: pass flow.export("SGDClassfier", export_folder="temp") shutil.rmtree("temp")
from os.path import dirname, join import sys from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.count import CountVectorizer from sklearn.svm import SVC from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.data(X, y) transformer = CountVectorizer(ngram_range=(1, 2), max_df=0.5, min_df=8) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SVC(kernel='linear')), "SVC")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.train() flow.export(model_name="SVC", export_folder="model")
# for quick experiment # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt") # sentences = vlsp2016.load_data(file) # for evaluation # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt") # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt") # sentences = vlsp2016.load_data(file) # for saving model sentences = [] for f in ["train.txt", "dev.txt", "test.txt"]: file = join(dirname(__file__), "corpus", "vlsp2016", f) sentences += vlsp2016.load_data(file) flow.data(sentences=sentences) # =========================================================================# # Transformer # =========================================================================# template = [ "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower", "T[2].lower", "T[0].istitle", "T[-1].istitle", "T[1].istitle", "T[-2].istitle", "T[2].istitle", # word unigram and bigram "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]", "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]", # pos unigram and bigram "T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]", "T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]", # ner "T[-3][3]", "T[-2][3]", "T[-1][3]",