def test_export(self): flow = Flow() X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) y = np.array([1, 1, 2, 2]) flow.data(X=X, y=y) model = Model(SGDClassifier(), "SGDClassfier") flow.add_model(model) try: mkdir("temp") except: pass flow.export("SGDClassfier", export_folder="temp") shutil.rmtree("temp")
from os.path import dirname, join import sys from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.count import CountVectorizer from sklearn.svm import SVC from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.data(X, y) transformer = CountVectorizer(ngram_range=(1, 2), max_df=0.5, min_df=8) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SVC(kernel='linear')), "SVC")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.train() flow.export(model_name="SVC", export_folder="model")
"T[-3][3]", "T[-2][3]", "T[-1][3]", ] transformer = TaggedTransformer(template) flow.transform(transformer) # =========================================================================# # Models # =========================================================================# crf_params = { 'c1': 1.0, # coefficient for L1 penalty 'c2': 1e-3, # coefficient for L2 penalty 'max_iterations': 1000, # # include transitions that are possible, but not observed 'feature.possible_transitions': True } flow.add_model(Model(CRF(params=crf_params), "CRF")) # =========================================================================# # Evaluation # =========================================================================# flow.add_score('f1_chunk') flow.add_score('accuracy_chunk') flow.set_validation(TrainTestSplitValidation(test_size=0.1)) # flow.set_validation(TrainTestSplitValidation(test_size=0.3)) flow.train() # flow.save_model("CRF", filename="ner_crf_20171006_template_2.model")
import sys from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.count import CountVectorizer from sklearn.linear_model import LogisticRegression from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset if __name__ == '__main__': data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.data(X, y) transformer = CountVectorizer(ngram_range=(1, 3), max_df=0.8, min_df=8, max_features=5000) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model( Model(OneVsRestClassifier(LogisticRegression()), "LogisticRegression")) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) flow.train() flow.export(model_name="LogisticRegression", export_folder="model")
from os.path import dirname, join from languageflow.flow import Flow from languageflow.model import Model from languageflow.transformer.tfidf import TfidfVectorizer from languageflow.validation.validation import TrainTestSplitValidation from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from load_data import load_dataset from sklearn.linear_model import SGDClassifier if __name__ == '__main__': data_file = join(dirname(dirname(dirname(__file__))), "data", "fb_bank_sentiment", "corpus", "train.xlsx") X, y = load_dataset(data_file) flow = Flow() flow.log_folder = "log" flow.data(X, y) transformer = TfidfVectorizer(ngram_range=(1, 3)) flow.transform(MultiLabelBinarizer()) flow.transform(transformer) flow.add_model(Model(OneVsRestClassifier(SGDClassifier()), "SGD")) # flow.set_learning_curve(0.7, 1, 0.3) flow.set_validation(TrainTestSplitValidation(test_size=0.1)) flow.train() flow.export(model_name="SGD", export_folder="model")