コード例 #1
0
 def test_export(self):
     flow = Flow()
     X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     y = np.array([1, 1, 2, 2])
     flow.data(X=X, y=y)
     model = Model(SGDClassifier(), "SGDClassfier")
     flow.add_model(model)
     try:
         mkdir("temp")
     except:
         pass
     flow.export("SGDClassfier", export_folder="temp")
     shutil.rmtree("temp")
コード例 #2
0
from os.path import dirname, join
import sys
from languageflow.flow import Flow
from languageflow.model import Model
from languageflow.transformer.count import CountVectorizer
from sklearn.svm import SVC
from languageflow.validation.validation import TrainTestSplitValidation
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.data(X, y)

    transformer = CountVectorizer(ngram_range=(1, 2), max_df=0.5, min_df=8)
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)
    flow.add_model(Model(OneVsRestClassifier(SVC(kernel='linear')), "SVC"))
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    # flow.train()
    flow.export(model_name="SVC", export_folder="model")
コード例 #3
0
ファイル: main.py プロジェクト: anhlbt/ner
    # for quick experiment
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt")
    # sentences = vlsp2016.load_data(file)

    # for evaluation
    # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt")
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt")
    # sentences = vlsp2016.load_data(file)

    # for saving model
    sentences = []
    for f in ["train.txt", "dev.txt", "test.txt"]:
        file = join(dirname(__file__), "corpus", "vlsp2016", f)
        sentences += vlsp2016.load_data(file)

    flow.data(sentences=sentences)

    # =========================================================================#
    #                                Transformer
    # =========================================================================#
    template = [
        "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower", "T[2].lower",
        "T[0].istitle", "T[-1].istitle", "T[1].istitle", "T[-2].istitle", "T[2].istitle",
        # word unigram and bigram
        "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
        "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
        # pos unigram and bigram
        "T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]",
        "T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]",
        # ner
        "T[-3][3]", "T[-2][3]", "T[-1][3]",