def test_export(self):
     flow = Flow()
     X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     y = np.array([1, 1, 2, 2])
     flow.data(X=X, y=y)
     model = Model(SGDClassifier(), "SGDClassfier")
     flow.add_model(model)
     try:
         mkdir("temp")
     except:
         pass
     flow.export("SGDClassfier", export_folder="temp")
     shutil.rmtree("temp")
Example #2
0
from os.path import dirname, join
import sys
from languageflow.flow import Flow
from languageflow.model import Model
from languageflow.transformer.count import CountVectorizer
from sklearn.svm import SVC
from languageflow.validation.validation import TrainTestSplitValidation
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data", "fb_bank", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.data(X, y)

    transformer = CountVectorizer(ngram_range=(1, 2), max_df=0.5, min_df=8)
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)
    flow.add_model(Model(OneVsRestClassifier(SVC(kernel='linear')), "SVC"))
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    # flow.train()
    flow.export(model_name="SVC", export_folder="model")
Example #3
0
import os
from languageflow.flow import Flow
from languageflow.model import Model
from languageflow.model.crf import CRF
from languageflow.transformer.tagged import TaggedTransformer
from languageflow.validation.validation import TrainTestSplitValidation

from load_data import load_data

from utils.scorer import iob_score

if __name__ == '__main__':
    # =========================================================================#
    # Start an experiment with flow
    # =========================================================================#
    flow = Flow()
    flow.log_folder = join(dirname(__file__), "logs")

    # =========================================================================#
    #                               Data
    # =========================================================================#

    # for saving model
    sentences = []
    for f in ["train.txt", "dev.txt", "test.txt"]:
        file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", f)
        sentences.append(load_data(file))
    train_sentences = sentences[0] + sentences[1]
    test_sentences = sentences[2]
    train_sentences = train_sentences
Example #4
0
File: main.py Project: anhlbt/ner
from os.path import dirname, join
from languageflow.flow import Flow
from languageflow.model import Model
from languageflow.model.crf import CRF
from languageflow.transformer.tagged import TaggedTransformer
from languageflow.validation.validation import TrainTestSplitValidation

from preprocess import vlsp2016

if __name__ == '__main__':
    # =========================================================================#
    # Start an experiment with flow
    # =========================================================================#
    flow = Flow()
    flow.log_folder = join(dirname(__file__), "logs")

    # =========================================================================#
    #                               Data
    # =========================================================================#


    # for quick experiment
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt")
    # sentences = vlsp2016.load_data(file)

    # for evaluation
    # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt")
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt")
    # sentences = vlsp2016.load_data(file)

    # for saving model
Example #5
0
import sys
from languageflow.flow import Flow
from languageflow.model import Model
from languageflow.transformer.count import CountVectorizer
from sklearn.linear_model import LogisticRegression
from languageflow.validation.validation import TrainTestSplitValidation
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data",
                     "fb_bank", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.data(X, y)

    transformer = CountVectorizer(ngram_range=(1, 3),
                                  max_df=0.8,
                                  min_df=8,
                                  max_features=5000)
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)
    flow.add_model(
        Model(OneVsRestClassifier(LogisticRegression()), "LogisticRegression"))
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    flow.train()
    flow.export(model_name="LogisticRegression", export_folder="model")