def to_xbill(self) -> XBill: def unify_status(fund_status): if '支出' in fund_status: status = BillStatus.PAYOUT elif '收入' in fund_status: status = BillStatus.INCOME else: status = BillStatus.INTERNAL_TRANS return status def format_remarks(*args) -> str: s = [v for v in args if v != '/'] return ';'.join(s) xbill = XBill() xbill.account = self.account xbill.amount = self.amount xbill.currency = '人民币' xbill.trans_time = self.trans_time xbill.status = unify_status(self.fund_status) xbill.trader_name = self.trader_name xbill.product_name = self.product_name xbill.remarks = format_remarks(self.pay_type, self.trans_type, self.trans_status, self.remarks) xbill.associate_id = -1 xbill.status, xbill.category, xbill.subcategory = Classifier( ).classify(xbill) return xbill
def to_xbill(self) -> XBill: def unify_status(fund_status): if '支出' in fund_status: status = BillStatus.PAYOUT elif '收入' in fund_status: status = BillStatus.INCOME elif '资金转移' in fund_status: status = BillStatus.INTERNAL_TRANS else: status = fund_status return status xbill_record = XBill() xbill_record.account = self.account xbill_record.amount = self.amount + self.service_fee xbill_record.currency = '人民币' xbill_record.trans_time = self.create_time xbill_record.status = unify_status(self.fund_status) xbill_record.trader_name = self.trader_name xbill_record.product_name = self.product_name xbill_record.remarks = self.source + ';' + self.remarks xbill_record.associate_id = -1 xbill_record.status, xbill_record.category, xbill_record.subcategory = Classifier( ).classify(xbill_record) return xbill_record
def Train(self, _Classifier, trainFeaturesExtractor, Full): # The serialization file to save the features configFileClassifier = os.path.join( self.basePath, "Classifier", "Configurations", "Configurations-" + _Classifier + ".xml") modelSerializationFile = os.path.join(self.basePath, "Classifier", "Output", "classifier_model.bin") # Start the Classifier: #--------------------- self.classifier = Classifier(configFileClassifier, modelSerializationFile, trainFeaturesExtractor.features, trainFeaturesExtractor.labels, [], []) if Full == True: self.classifier.Train()
def to_xbill(self): xbill_record = XBill() xbill_record.account = self.account xbill_record.amount = self.amount xbill_record.currency = self.currency xbill_record.trans_time = self.trans_time xbill_record.status = self.status xbill_record.trader_name = self.trader_name xbill_record.product_name = self.summary xbill_record.remarks = self.product_name xbill_record.associate_id = -1 xbill_record.status, xbill_record.category, xbill_record.subcategory = Classifier( ).classify(xbill_record) return xbill_record
if LOAD_LABELS: testFeaturesExtractor.LoadLabels() if DUMP_FEATURES: trainFeaturesExtractor.DumpFeaturesToTxt(trainExportFileName) testFeaturesExtractor.DumpFeaturesToTxt(testExportFileName) if (CLASSIFIER): # The serialization file to save the features modelSerializationFile = ".\\Classifier\\Output\classifier_model.bin" # Start the Classifier: #--------------------- if (SVM_CLASSIFIER): classifier = Classifier(modelSerializationFile, 'SVM', trainFeaturesExtractor.features, trainFeaturesExtractor.labels, testFeaturesExtractor.features, testFeaturesExtractor.labels) if not LOAD_MODEL: # Train classifier.Train() classifier.SaveModel() else: classifier.LoadModel() # Test labels, acc, val = classifier.Test() # Build the confusion matrix mConfusionMatrix, mNormalConfusionMatrix, vNumTrainExamplesPerClass, vAccuracyPerClass, nOverallAccuracy = classifier.BuildConfusionMatrix( testFeaturesExtractor.labels, labels)
def __init__(self): ''' Constructor :type self: ''' # Start the DatasetBuilder #------------------------- # Configurations file xml of the dataset builder configFileDatasetBuilder = os.path.join('DatasetBuilder','Configurations','Configurations.xml') # The serialization file to save the dataset datasetSerializationFile = os.path.join('DatasetBuilder','Output', 'dataset.bin') # The XLSX file name for train set xlsxTrainFileName = os.path.join('DatasetBuilder','Input','train') # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile(xlsxTrainFileName) # Configurations file xml of the language model configFileLanguageModel_lexicon = os.path.join('LanguageModel', 'Configurations', 'Configurations-lexicon.xml') configFileLanguageModel_Tasi = os.path.join('LanguageModel', 'Configurations', 'Configurations-Tasi.xml') stopWordsFileName = os.path.join('LanguageModel', 'Input', 'stop_words.txt') linksDBFile = os.path.join('LanguageModel', 'Output', 'links_database.txt') # The serialization file to save the model languageModelSerializationFile = os.path.join('LanguageModel', 'Output', 'language_model.bin') # Start the LanguageModel: # Initialize the LanguageModel_Lexicon self.languageModel_lexicon = LanguageModel(configFileLanguageModel_lexicon, stopWordsFileName, languageModelSerializationFile, linksDBFile, datasetBuilder.trainSet) self.languageModel_lexicon.BuildLanguageModel() # Initialize the LanguageModel_Tasi self.languageModel_Tasi = LanguageModel(configFileLanguageModel_Tasi, stopWordsFileName, languageModelSerializationFile, linksDBFile, datasetBuilder.trainSet) self.languageModel_Tasi.BuildLanguageModel() # Configurations file xml of the features extractor configFileFeaturesExtractor_Lexicon = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-lexicon.xml') configFileFeaturesExtractor_Tasi = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-Tasi.xml') # The serialization file to save the features trainFeaturesSerializationFile = os.path.join('FeaturesExtractor', 'Output', 'train_features.bin') trainLabelsSerializationFile = os.path.join('FeaturesExtractor', 'Output', 'train_labels.bin') # Start the FeaturesExtractor: #----------------------------- # Initialize the FeaturesExtractor _ Lexicon trainFeaturesExtractor_Lexicon = FeaturesExtractor(configFileFeaturesExtractor_Lexicon, trainFeaturesSerializationFile, trainLabelsSerializationFile, self.languageModel_lexicon, datasetBuilder.trainSet) trainFeaturesExtractor_Lexicon.ExtractNumTfFeatures() # Initialize the FeaturesExtractor _ Tasi trainFeaturesExtractor_Tasi = FeaturesExtractor(configFileFeaturesExtractor_Tasi, trainFeaturesSerializationFile, trainLabelsSerializationFile, self.languageModel_Tasi, datasetBuilder.trainSet) trainFeaturesExtractor_Tasi.ExtractNumTfFeatures() # The serialization file to save the features configFileClassifier_Lexicon = os.path.join('Classifier', 'Configurations', 'Configurations-lexicon.xml') configFileClassifier_Tasi = os.path.join('Classifier', 'Configurations', 'Configurations-Tasi.xml') modelSerializationFile = os.path.join('Classifier', 'Output', 'classifier_model.bin') # Start the Classifier: #--------------------- print(trainFeaturesExtractor_Tasi.labels[:4]) print([i['label'] for i in trainFeaturesExtractor_Lexicon.dataSet[:4]]) self.classifier_Lexicon = Classifier(configFileClassifier_Lexicon, modelSerializationFile, trainFeaturesExtractor_Lexicon.features, trainFeaturesExtractor_Lexicon.labels, [], []) self.classifier_Tasi = Classifier(configFileClassifier_Tasi, modelSerializationFile, trainFeaturesExtractor_Tasi.features, trainFeaturesExtractor_Tasi.labels, [],[]) # Train self.classifier_Lexicon.Train() self.classifier_Tasi.Train()
class Filter(object): ''' classdocs ''' def __init__(self): ''' Constructor :type self: ''' # Start the DatasetBuilder #------------------------- # Configurations file xml of the dataset builder configFileDatasetBuilder = os.path.join('DatasetBuilder','Configurations','Configurations.xml') # The serialization file to save the dataset datasetSerializationFile = os.path.join('DatasetBuilder','Output', 'dataset.bin') # The XLSX file name for train set xlsxTrainFileName = os.path.join('DatasetBuilder','Input','train') # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile(xlsxTrainFileName) # Configurations file xml of the language model configFileLanguageModel_lexicon = os.path.join('LanguageModel', 'Configurations', 'Configurations-lexicon.xml') configFileLanguageModel_Tasi = os.path.join('LanguageModel', 'Configurations', 'Configurations-Tasi.xml') stopWordsFileName = os.path.join('LanguageModel', 'Input', 'stop_words.txt') linksDBFile = os.path.join('LanguageModel', 'Output', 'links_database.txt') # The serialization file to save the model languageModelSerializationFile = os.path.join('LanguageModel', 'Output', 'language_model.bin') # Start the LanguageModel: # Initialize the LanguageModel_Lexicon self.languageModel_lexicon = LanguageModel(configFileLanguageModel_lexicon, stopWordsFileName, languageModelSerializationFile, linksDBFile, datasetBuilder.trainSet) self.languageModel_lexicon.BuildLanguageModel() # Initialize the LanguageModel_Tasi self.languageModel_Tasi = LanguageModel(configFileLanguageModel_Tasi, stopWordsFileName, languageModelSerializationFile, linksDBFile, datasetBuilder.trainSet) self.languageModel_Tasi.BuildLanguageModel() # Configurations file xml of the features extractor configFileFeaturesExtractor_Lexicon = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-lexicon.xml') configFileFeaturesExtractor_Tasi = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-Tasi.xml') # The serialization file to save the features trainFeaturesSerializationFile = os.path.join('FeaturesExtractor', 'Output', 'train_features.bin') trainLabelsSerializationFile = os.path.join('FeaturesExtractor', 'Output', 'train_labels.bin') # Start the FeaturesExtractor: #----------------------------- # Initialize the FeaturesExtractor _ Lexicon trainFeaturesExtractor_Lexicon = FeaturesExtractor(configFileFeaturesExtractor_Lexicon, trainFeaturesSerializationFile, trainLabelsSerializationFile, self.languageModel_lexicon, datasetBuilder.trainSet) trainFeaturesExtractor_Lexicon.ExtractNumTfFeatures() # Initialize the FeaturesExtractor _ Tasi trainFeaturesExtractor_Tasi = FeaturesExtractor(configFileFeaturesExtractor_Tasi, trainFeaturesSerializationFile, trainLabelsSerializationFile, self.languageModel_Tasi, datasetBuilder.trainSet) trainFeaturesExtractor_Tasi.ExtractNumTfFeatures() # The serialization file to save the features configFileClassifier_Lexicon = os.path.join('Classifier', 'Configurations', 'Configurations-lexicon.xml') configFileClassifier_Tasi = os.path.join('Classifier', 'Configurations', 'Configurations-Tasi.xml') modelSerializationFile = os.path.join('Classifier', 'Output', 'classifier_model.bin') # Start the Classifier: #--------------------- print(trainFeaturesExtractor_Tasi.labels[:4]) print([i['label'] for i in trainFeaturesExtractor_Lexicon.dataSet[:4]]) self.classifier_Lexicon = Classifier(configFileClassifier_Lexicon, modelSerializationFile, trainFeaturesExtractor_Lexicon.features, trainFeaturesExtractor_Lexicon.labels, [], []) self.classifier_Tasi = Classifier(configFileClassifier_Tasi, modelSerializationFile, trainFeaturesExtractor_Tasi.features, trainFeaturesExtractor_Tasi.labels, [],[]) # Train self.classifier_Lexicon.Train() self.classifier_Tasi.Train() def Classify(self, text, stockName): testSet = [] for t in text: testSet.append({'label' : '', 'text' : t}) if stockName == 'Tasi': # Configurations file xml of the features extractor configFileFeaturesExtractor = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-Tasi.xml') testFeaturesExtractor = FeaturesExtractor(configFileFeaturesExtractor, None, None, self.languageModel_Tasi, testSet) testFeaturesExtractor.ExtractNumTfFeatures() self.classifier_Tasi.testFeatures = testFeaturesExtractor.features self.classifier_Tasi.testTargets = [] for i in range(len(self.classifier_Tasi.testFeatures)): #self.classifier_Tasi.testTargets[i] = 1 self.classifier_Tasi.testTargets.append(1) label, acc, val = self.classifier_Tasi.Test() else: configFileFeaturesExtractor = os.path.join('FeaturesExtractor', 'Configurations', 'Configurations-lexicon.xml') testFeaturesExtractor = FeaturesExtractor(configFileFeaturesExtractor, None, None, self.languageModel_lexicon, testSet) self.classifier_Lexicon.testFeatures = testFeaturesExtractor.features self.classifier_Lexicon.testTargets = [] for i in range(len(self.classifier_Lexicon.testFeatures)): self.classifier_Lexicon.testTargets[i] = 1 label, acc, val = self.classifier_Lexicon.Test() return label
class Filter(object): ''' classdocs ''' def __init__(self, basePath, stockName, Retrain): ''' Constructor :type self: ''' if (basePath == None): self.basePath = self.basePath else: self.basePath = basePath self.stockName = stockName serializationFile = open( os.path.join(self.basePath, 'StockToClassifier.bin'), 'rb') self.StockToClassifier = pickle.load(serializationFile) #import pdb; pdb.set_trace() self.usedClassifier = self.StockToClassifier[self.stockName] # Start the DatasetBuilder #------------------------- # Configurations file xml of the dataset builder configFileDatasetBuilder = os.path.join(self.basePath, "DatasetBuilder", "Configurations", "Configurations.xml") # The serialization file to save the dataset datasetSerializationFile = os.path.join(self.basePath, "DatasetBuilder", "Output", "dataset.bin") if Retrain == False: # The XLSX file name for train set xlsxTrainFileName = os.path.join(self.basePath, "DatasetBuilder", "Input", "train") # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile( xlsxTrainFileName) self.RunLanguageModel(self.usedClassifier, datasetBuilder.trainSet) trainFeaturesExtractor = self.RunFeatureExtractor( self.usedClassifier, datasetBuilder.trainSet) self.Train(self.usedClassifier, trainFeaturesExtractor, True) else: # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) def Classify(self, text): testSet = [] for t in text: testSet.append({'label': '', 'text': t}) # Configurations file xml of the features extractor configFileFeaturesExtractor = os.path.join( self.basePath, "FeaturesExtractor", "Configurations", "Configurations-" + self.usedClassifier + ".xml") testFeaturesExtractor = FeaturesExtractor(configFileFeaturesExtractor, None, None, self.languageModel, testSet) if self.usedClassifier == "Lexicon": testFeaturesExtractor.ExtractLexiconFeatures() else: testFeaturesExtractor.ExtractNumTfFeatures() self.classifier.testFeatures = testFeaturesExtractor.features self.classifier.testTargets = [] for i in range(len(self.classifier.testFeatures)): self.classifier.testTargets.append(1) label, acc, val = self.classifier.Test() return label def RunLanguageModel(self, _Classifier, trainSet): # Configurations file xml of the language model configFileLanguageModel = os.path.join( self.basePath, "LanguageModel", "Configurations", "Configurations-" + _Classifier + ".xml") stopWordsFileName = os.path.join(self.basePath, "LanguageModel", "Input", "stop_words.txt") linksDBFile = os.path.join(self.basePath, "LanguageModel", "Output", "links_database.txt") # The serialization file to save the model languageModelSerializationFile = os.path.join(self.basePath, "LanguageModel", "Output", "language_model.bin") if _Classifier == "Lexicon": langModelTxtLoadFile = os.path.join( self.basePath, "LanguageModel", "Input", "language_model_lexicon_synonyms.txt") # Start the LanguageModel: # Initialize the LanguageModel_Lexicon self.languageModel = LanguageModel(configFileLanguageModel, stopWordsFileName, languageModelSerializationFile, linksDBFile, trainSet) self.languageModel.BuildLanguageModel() if _Classifier == "Lexicon": self.languageModel.LoadModelFromTxtFile(langModelTxtLoadFile) def RunFeatureExtractor(self, _Classifier, trainSet): # Configurations file xml of the features extractor configFileFeaturesExtractor = os.path.join( self.basePath, "FeaturesExtractor", "Configurations", "Configurations-" + _Classifier + ".xml") # The serialization file to save the features trainFeaturesSerializationFile = os.path.join(self.basePath, "FeaturesExtractor", "Output", "train_features.bin") trainLabelsSerializationFile = os.path.join(self.basePath, "FeaturesExtractor", "Output", "train_labels.bin") # Start the FeaturesExtractor: #----------------------------- # Initialize the FeaturesExtractor trainFeaturesExtractor = FeaturesExtractor( configFileFeaturesExtractor, trainFeaturesSerializationFile, trainLabelsSerializationFile, self.languageModel, trainSet) if _Classifier == "Lexicon": trainFeaturesExtractor.ExtractLexiconFeatures() else: trainFeaturesExtractor.ExtractNumTfFeatures() return trainFeaturesExtractor def Train(self, _Classifier, trainFeaturesExtractor, Full): # The serialization file to save the features configFileClassifier = os.path.join( self.basePath, "Classifier", "Configurations", "Configurations-" + _Classifier + ".xml") modelSerializationFile = os.path.join(self.basePath, "Classifier", "Output", "classifier_model.bin") # Start the Classifier: #--------------------- self.classifier = Classifier(configFileClassifier, modelSerializationFile, trainFeaturesExtractor.features, trainFeaturesExtractor.labels, [], []) if Full == True: self.classifier.Train() def GetBestClassifier(self, trainSet): #import pdb; pdb.set_trace() self.RunLanguageModel("Lexicon", trainSet) trainFeaturesExtractor = self.RunFeatureExtractor("Lexicon", trainSet) self.Train("Lexicon", trainFeaturesExtractor, False) LexiconAcc = self.classifier.getCrossValidationAccuarcy() self.RunLanguageModel("DT", trainSet) trainFeaturesExtractor = self.RunFeatureExtractor("DT", trainSet) self.Train("DT", trainFeaturesExtractor, False) DTAcc = self.classifier.getCrossValidationAccuarcy() self.RunLanguageModel("SVM", trainSet) trainFeaturesExtractor = self.RunFeatureExtractor("SVM", trainSet) self.Train("SVM", trainFeaturesExtractor, False) SVMAcc = self.classifier.getCrossValidationAccuarcy() bestClassifier = max(LexiconAcc, DTAcc, SVMAcc) if bestClassifier == LexiconAcc: self.StockToClassifier[self.stockName] = "Lexicon" elif bestClassifier == DTAcc: self.StockToClassifier[self.stockName] = "DT" else: self.StockToClassifier[self.stockName] = "SVM"
testFeatures, testLabels = featuresExtractor.ExtractCollectiveLexiconSentimentFeatures( testSet) ''' # This part is extracting the BoW features scoring for the 3 scores per each word BoW = featuresExtractor.ConstructBowWithSentiWordNet(dataSet) ''' ''' trainFeatures, trainLabels = featuresExtractor.ExtractBoWSentiWordNetFeatures(trainSet, BoW) testFeatures, testLabels = featuresExtractor.ExtractBoWSentiWordNetFeatures(testSet, BoW) ''' ''' trainFeatures, trainLabels = featuresExtractor.ExtractBoWSentiWordNetCollectiveFeatures(trainSet, BoW) testFeatures, testLabels = featuresExtractor.ExtractBoWSentiWordNetCollectiveFeatures(testSet, BoW) ''' # Initialize Classifier ####################### classifier = Classifier() # This part is for lexicon classifier. No training required print('Training accuracy = ' + str(classifier.LexiconTest(trainFeatures, trainLabels))) print('Test accuracy = ' + str(classifier.LexiconTest(testFeatures, testLabels))) train_err = classifier.NNTrain(trainFeatures, trainLabels, plot=True) test_err = classifier.NNTest(testFeatures, testLabels, plot=True) print('Training accuracy = ' + str((len(trainFeatures) - train_err) / len(trainFeatures))) print('Test accuracy = ' + str((len(testFeatures) - test_err) / len(testFeatures)))
def init(cls, save_path, use_backend=True, pre_stocks=None): ''' Constructor :type self: ''' global stocks if pre_stocks: stocks = pre_stocks for stock in stocks: print('Buildind model for %s' % stock) stock_model = {} # Start the DatasetBuilder #------------------------- # Configurations file xml of the dataset builder configFileDatasetBuilder = os.path.join('DatasetBuilder', 'Configurations', 'Configurations.xml') # The serialization file to save the dataset datasetSerializationFile = os.path.join('DatasetBuilder', 'Output', 'dataset.bin') # The XLSX file name for train set xlsxTrainFileName = os.path.join('DatasetBuilder', 'Input', 'train') # Initialize the DatasetBuilder from serialization file datasetBuilder = DatasetBuilder(configFileDatasetBuilder, [], datasetSerializationFile) if use_backend: datasetBuilder.trainSet = datasetBuilder.GetDatasetFromBackend( stock) else: datasetBuilder.trainSet = datasetBuilder.GetDatasetFromXLSXFile( xlsxTrainFileName, stock) if len(datasetBuilder.trainSet) < NMIN_SET: print("Not enough data: ", len(datasetBuilder.trainSet)) continue datasetBuilder.trainSet = datasetBuilder.trainSet[NVALID:] # Configurations file xml of the language model configFileLanguageModel_lexicon = os.path.join( 'LanguageModel', 'Configurations', 'Configurations-Tasi.xml') stopWordsFileName = os.path.join('LanguageModel', 'Input', 'stop_words.txt') linksDBFile = os.path.join('LanguageModel', 'Output', 'links_database.txt') # The serialization file to save the model languageModelSerializationFile = os.path.join( 'LanguageModel', 'Output', 'language_model.bin') # Start the LanguageModel: # Initialize the LanguageModel_Lexicon stock_model['languageModel_lexicon'] = LanguageModel( configFileLanguageModel_lexicon, stopWordsFileName, languageModelSerializationFile, linksDBFile, datasetBuilder.trainSet) stock_model['languageModel_lexicon'].BuildLanguageModel() # Configurations file xml of the features extractor configFileFeaturesExtractor_Lexicon = os.path.join( 'FeaturesExtractor', 'Configurations', 'Configurations-Tasi.xml') # The serialization file to save the features trainFeaturesSerializationFile = os.path.join( 'FeaturesExtractor', 'Output', 'train_features.bin') trainLabelsSerializationFile = os.path.join( 'FeaturesExtractor', 'Output', 'train_labels.bin') # Start the FeaturesExtractor: #----------------------------- # Initialize the FeaturesExtractor _ Lexicon trainFeaturesExtractor_Lexicon = FeaturesExtractor( configFileFeaturesExtractor_Lexicon, trainFeaturesSerializationFile, trainLabelsSerializationFile, stock_model['languageModel_lexicon'], datasetBuilder.trainSet) trainFeaturesExtractor_Lexicon.ExtractNumTfFeatures(sparse=True) #print(trainFeaturesExtractor_Lexicon.features[0]) # The serialization file to save the features configFileClassifier_Lexicon = os.path.join( 'Classifier', 'Configurations', 'Configurations-Tasi.xml') modelSerializationFile = os.path.join('Classifier', 'Output', 'classifier_model.bin') # Start the Classifier: #--------------------- stock_model['classifier_Lexicon'] = Classifier( configFileClassifier_Lexicon, modelSerializationFile, trainFeaturesExtractor_Lexicon.sparse_features, trainFeaturesExtractor_Lexicon.labels, [], []) #stock_model['classifier_Lexicon'] = Classifier(configFileClassifier_Lexicon, modelSerializationFile, trainFeaturesExtractor_Lexicon.features, trainFeaturesExtractor_Lexicon.labels, [], []) #print(trainFeaturesExtractor_Lexicon.labels[:4]) #print([i['label'] for i in trainFeaturesExtractor_Lexicon.dataSet[:4]]) # Train stock_model['classifier_Lexicon'].Train() stock_model['training_samples'] = len(datasetBuilder.trainSet) cls.save(save_path, stock, stock_model) print("----------------------------------------------------")
import argparse import pandas as pd import time import numpy as np import multiprocessing as mp from sklearn.model_selection import KFold from sklearn.metrics import accuracy_score, precision_recall_fscore_support from Classifier.Classifier import Classifier, Feature NO_PARALLELIZED_PROCESSES_DYNAMIC = 5 NO_PARALLELIZED_PROCESSES_STATIC = 5 RANDOMIZER_SEED = 1 CLF_OBJ = Classifier() DATA_FILE_PATH = '' def main(): start = time.time() ## Get Command-line Arguments ################# parser = argparse.ArgumentParser() parser.add_argument('-d', '--data', default='../../data', help='') opts = parser.parse_args() ############################################### global DATA_FILE_PATH DATA_FILE_PATH = opts.data InitializeOutputFile() global RANDOMIZER_SEED
import statistics import time import multiprocessing as mp from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_recall_fscore_support from sklearn import model_selection from scipy.sparse import coo_matrix from Classifier.Classifier import Classifier from Helper.DebugPrint import DebugPrint import matplotlib.pyplot as plt plt.style.use('ggplot') clf = Classifier() RANDOMIZER_SEED = 1 def main(): start = time.time() ## Get Command-line Arguments ################# parser = argparse.ArgumentParser() opts = parser.parse_args() ############################################### ## Build the Training Set and Testing Set ##### training_data_dict = LoadDataSet('data/train_data.xlsx') testing_data_dict = LoadDataSet('data/test_data.xlsx') unlabeled_data_dict = LoadDataSet('data/unlabeled_data.xlsx')
#testFeaturesExtractor.ExtractTFIDFFeatures() #testFeaturesExtractor.ExtractNumTfFeatures() #testFeaturesExtractor.ExtractKLFeatures() testFeaturesExtractor.ExtractLexiconFeatures() #testFeaturesExtractor.SaveFeatures() #testFeaturesExtractor.SaveLabels() #testFeaturesExtractor.DumpFeaturesToTxt(testExportFileName) # The serialization file to save the features configFileClassifier = ".\\Classifier\\Configurations\\Configurations.xml" modelSerializationFile = ".\\Classifier\\Output\classifier_model.bin" # Start the Classifier: #--------------------- classifier = Classifier(configFileClassifier, modelSerializationFile, trainFeaturesExtractor.features, trainFeaturesExtractor.labels, testFeaturesExtractor.features, testFeaturesExtractor.labels) # Train #classifier.Train() # Test labels, acc, val = classifier.Test() # Build the confusion matrix mConfusionMatrix, mNormalConfusionMatrix, vNumTrainExamplesPerClass, vAccuracyPerClass, nOverallAccuracy = classifier.BuildConfusionMatrix( testFeaturesExtractor.labels, labels) print(mConfusionMatrix)