Ejemplo n.º 1
0
from Data.DataManager import DataManager
from ScriptToolkit import ScriptToolkit
from Preprocessing.DataReader import DataReader
from Preprocessing import ProcessorFactory
from Model.ConditionalRandomField import CRF

if __name__ == '__main__':
    # create data manager
    DM = DataManager()
    DM.change_pwd()
    DM.source_data_file = 'CorpusLabelData_SalesModule.txt'
    DM.remove(DM.log_wrong_sentences)

    # create datums
    DR = DataReader(source_data_file=DM.source_data_file)
    DR.standard_read()

    # create toolkits
    ST = ScriptToolkit(DM)
    features = ScriptToolkit.get_demo_features()

    # analysis
    sent_accuracys, train_times, test_times = [], [], []
    cycle_times = 30
    for i in range(cycle_times):
        # data preprocessing
        crf_processor = ProcessorFactory.CRFProcessorFactory().produce(
            source_data_file=DM.source_data_file,
            train_file=DM.train_file,
            test_file=DM.test_file)
        crf_processor.get_train_data(DR.Datums)
Ejemplo n.º 2
0
from Data.DataManager import DataManager
from Preprocessing import ProcessorFactory
from Model.ConditionalRandomField import CRF
from ScriptToolkit import ScriptToolkit
from Preprocessing.DataReader import DataReader

if __name__ == '__main__':
    # create data manager
    DM = DataManager()
    DM.change_pwd()
    DM.source_data_file = 'CorpusLabelData_MergedFilter_Update.txt'
    DM.remove(DM.log_wrong_sentences)

    # create datums
    DR = DataReader(source_data_file=DM.source_data_file)
    DR.standard_read()

    # create toolkits
    ST = ScriptToolkit(DM)
    features = ScriptToolkit.get_demo_features()

    # analysis
    sent_accuracys = []
    cycle_times = 1
    for i in range(cycle_times):
        # data preprocessing
        crf_processor = ProcessorFactory.CRFProcessorFactory().produce(
            source_data_file=DM.source_data_file,
            train_file=DM.train_file,
            test_file=DM.test_file)
        crf_processor.get_train_data(DR.Datums)