Exemple #1
0
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.validation.validation import TrainTestSplitValidation
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset
from transformer import TfidfVectorizer
from sklearn.linear_model import SGDClassifier

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(__file__))), "data",
                     "fb_bank_category", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.log_folder = "log"

    flow.data(X, y)

    transformer = TfidfVectorizer(ngram_range=(1, 3))
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)

    flow.add_model(Model(OneVsRestClassifier(SGDClassifier()), "SGD"))

    # flow.set_learning_curve(0.7, 1, 0.3)
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    flow.train()
    flow.export_folder = "model"
Exemple #2
0
from os.path import dirname, join
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.model.crf import CRF
from underthesea_flow.transformer.tagged import TaggedTransformer
from underthesea_flow.validation.validation import TrainTestSplitValidation

from preprocess import vlsp2016

if __name__ == '__main__':
    # =========================================================================#
    # Start an experiment with flow
    # =========================================================================#
    flow = Flow()
    flow.log_folder = join(dirname(__file__), "logs")

    # =========================================================================#
    #                               Data
    # =========================================================================#

    # for quick experiment
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "test.txt")
    # sentences = vlsp2016.load_data(file)

    # for evaluation
    # file = join(dirname(__file__), "corpus", "vlsp2016", "train.txt")
    # file = join(dirname(__file__), "corpus", "sample_vlsp_2016", "train.txt")
    # sentences = vlsp2016.load_data(file)

    # for saving model
    sentences = []
Exemple #3
0
from os.path import dirname, join
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.validation.validation import TrainTestSplitValidation

from load_data import load_dataset
from model.model_fasttext import FastTextClassifier

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data",
                     "fb_bank_act_2", "corpus", "data.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.log_folder = "log"

    flow.data(X, y)

    flow.add_model(Model(FastTextClassifier(), "FastText"))

    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    # flow.validation()

    model_name = "FastText"
    model_filename = join("model", "fasttext.model")
    flow.train()
    flow.save_model(model_name="FastText", model_filename=model_filename)