Exemple #1
0
    # word unigram and bigram
    "T[-2]",
    "T[-1]",
    "T[0]",
    "T[1]",
    "T[2]",
    "T[-2,-1]",
    "T[-1,0]",
    "T[0,1]",
    "T[1,2]",
    # pos unigram and bigram
    "T[-3][1]",
    "T[-2][1]",
    "T[-1][1]",
    "T[-3,-2][1]",
    "T[-2,-1][1]",
]
transformer = TaggedTransformer(template)

# ============================================================================ #
#                                Model
# ============================================================================ #
crf_params = {
    'c1': 1.0,  # coefficient for L1 penalty
    'c2': 1e-3,  # coefficient for L2 penalty
    'max_iterations': 1000,  #
    # include transitions that are possible, but not observed
    'feature.possible_transitions': True
}
model = Model(CRF(params=crf_params), "CRF")
Exemple #2
0
        "T[-1][3]",
    ]
    transformer = TaggedTransformer(template)

    flow.transform(transformer)

    # =========================================================================#
    #                               Models
    # =========================================================================#
    crf_params = {
        'c1': 1.0,  # coefficient for L1 penalty
        'c2': 1e-3,  # coefficient for L2 penalty
        'max_iterations': 1000,  #
        # include transitions that are possible, but not observed
        'feature.possible_transitions': True
    }
    flow.add_model(Model(CRF(params=crf_params), "CRF"))

    # =========================================================================#
    #                              Evaluation
    # =========================================================================#
    flow.add_score('f1_chunk')
    flow.add_score('accuracy_chunk')

    flow.set_validation(TrainTestSplitValidation(test_size=0.1))
    # flow.set_validation(TrainTestSplitValidation(test_size=0.3))

    flow.train()

    # flow.save_model("CRF", filename="ner_crf_20171006_template_2.model")
Exemple #3
0
from sklearn.multiclass import OneVsRestClassifier
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.validation.validation import TrainTestSplitValidation
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset
from transformer import TfidfVectorizer

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(__file__))), "data",
                     "fb_bank_act_3", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.log_folder = "log"

    flow.data(X, y)

    transformer = TfidfVectorizer(ngram_range=(1, 2))
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)

    flow.add_model(
        Model(OneVsRestClassifier(LogisticRegression()), "LogisticRegression"))

    # flow.set_learning_curve(0.7, 1, 0.3)
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    flow.train()
    flow.export_folder = "model"
    flow.export(model_name="LogisticRegression")
Exemple #4
0
from sklearn.multiclass import OneVsRestClassifier
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.validation.validation import TrainTestSplitValidation
from sklearn.preprocessing import MultiLabelBinarizer
from load_data import load_dataset
from transformer import TfidfVectorizer
from sklearn.linear_model import SGDClassifier

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(__file__))), "data",
                     "fb_bank_category", "corpus", "train.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.log_folder = "log"

    flow.data(X, y)

    transformer = TfidfVectorizer(ngram_range=(1, 3))
    flow.transform(MultiLabelBinarizer())
    flow.transform(transformer)

    flow.add_model(Model(OneVsRestClassifier(SGDClassifier()), "SGD"))

    # flow.set_learning_curve(0.7, 1, 0.3)
    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    flow.train()
    flow.export_folder = "model"
    flow.export(model_name="SGD")
Exemple #5
0
from os.path import dirname, join
from underthesea_flow.flow import Flow
from underthesea_flow.model import Model
from underthesea_flow.validation.validation import TrainTestSplitValidation

from load_data import load_dataset
from model.model_fasttext import FastTextClassifier

if __name__ == '__main__':
    data_file = join(dirname(dirname(dirname(dirname(__file__)))), "data",
                     "fb_bank_act_2", "corpus", "data.xlsx")
    X, y = load_dataset(data_file)

    flow = Flow()
    flow.log_folder = "log"

    flow.data(X, y)

    flow.add_model(Model(FastTextClassifier(), "FastText"))

    flow.set_validation(TrainTestSplitValidation(test_size=0.1))

    # flow.validation()

    model_name = "FastText"
    model_filename = join("model", "fasttext.model")
    flow.train()
    flow.save_model(model_name="FastText", model_filename=model_filename)