from Data.DataManager import DataManager from ScriptToolkit import ScriptToolkit from Preprocessing.DataReader import DataReader from Preprocessing import ProcessorFactory from Model.ConditionalRandomField import CRF if __name__ == '__main__': # create data manager DM = DataManager() DM.change_pwd() DM.source_data_file = 'CorpusLabelData_MergedFilter.txt' DM.remove(DM.log_wrong_sentences) DM.remove(DM.log_best_dataset) DM.remove(DM.log_worst_dataset) # create toolkits ST = ScriptToolkit(DM) features = ScriptToolkit.get_demo_features() # create datums DR = DataReader(source_data_file=DM.source_data_file) DR.standard_read() # analysis sent_accuracys = [] cycle_times = 1 max_accuracy, min_accuracy = 0.0, 1.0 max_data, min_data = None, None for i in range(cycle_times): # data preprocessing
from Data.DataManager import DataManager from ScriptToolkit import ScriptToolkit from Preprocessing.DataReader import DataReader from Preprocessing import ProcessorFactory from Model.ConditionalRandomField import CRF if __name__ == '__main__': # create data manager DM = DataManager() DM.change_pwd() DM.source_data_file = 'CorpusLabelData_SalesModule.txt' DM.remove(DM.log_wrong_sentences) # create datums DR = DataReader(source_data_file=DM.source_data_file) DR.standard_read() # create toolkits ST = ScriptToolkit(DM) features = ScriptToolkit.get_demo_features() # analysis sent_accuracys, train_times, test_times = [], [], [] cycle_times = 30 for i in range(cycle_times): # data preprocessing crf_processor = ProcessorFactory.CRFProcessorFactory().produce( source_data_file=DM.source_data_file, train_file=DM.train_file, test_file=DM.test_file) crf_processor.get_train_data(DR.Datums)
import os from Data.DataManager import DataManager from Preprocessing import ProcessorFactory from Model.ConditionalRandomField import CRF from Scripts.ScriptToolkit import ScriptToolkit from Preprocessing.DataReader import DataReader if __name__ == '__main__': # create data manager DM = DataManager() DM.change_pwd() DM.source_data_file = 'CorpusLabelData_SalesModule.txt' DM.remove(DM.features_train) DM.remove(DM.features_test) # create datums DR = DataReader(source_data_file=DM.source_data_file) DR.standard_read() # create toolkits ST = ScriptToolkit(DM) features = ScriptToolkit.get_demo_features() # feature setting features['printFeatures'] = '1' feature_sets = [features] # feature_sets = ScriptToolkit.get_custom_features('custom_features.txt') train_times = [] sent_accuracys = [] for features in feature_sets: