def __init__(self, pipeline_dir, prepared_dir, classifier_dir, pretrained, normalizer_dir, model_yaml_path, encoder_level='char', decoder_level='char', onmt_dir='./OpenNMT-py', language='en'): """ pretrained = None to disable the classifier model_yaml_path = None to use Rule-based normalizer """ self.pipeline_dir = pipeline_dir self.prepared_dir = prepared_dir self.classifier_dir = classifier_dir self.pretrained = pretrained self.normalizer_dir = normalizer_dir self.encoder_level = encoder_level self.decoder_level = decoder_level self.onmt_dir = onmt_dir check_folder(self.pipeline_dir) check_folder(self.pipeline_dir + '/tmp') self.Classifier = Classifier(pretrained, prepared_dir, classifier_dir) self.Normalizer = Normalizer(model_yaml_path, prepared_dir, normalizer_dir, norm_only=False if pretrained else True, onmt_dir=onmt_dir, encoder_level=encoder_level, decoder_level=decoder_level, language=language)
def __init__(self): classifier = Classifier() classifier.Build() # Trainer, Evaluator print("Reading Training set...") # self.setdata('something') self.trainer = Trainer(classifier) self.trainEvaluator = Evaluator("train", dataSettings.PATH_TO_TRAIN_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Validation set...") self.validationEvaluator = Evaluator( "validation", dataSettings.PATH_TO_VAL_SET_CATELOG, classifier) print("\t Done.\n") print("Reading Test set...") self.testEvaluator = Evaluator("test", dataSettings.PATH_TO_TEST_SET_CATELOG, classifier) print("\t Done.\n") # Summary summaryOp = tf.summary.merge_all() self.trainer.SetMergedSummaryOp(summaryOp) self.trainEvaluator.SetMergedSummaryOp(summaryOp) self.validationEvaluator.SetMergedSummaryOp(summaryOp) self.bestThreshold = None self.testEvaluator.SetMergedSummaryOp(summaryOp) # Time self._startTrainEpochTime = time.time() self._trainCountInOneEpoch = 0 # Saver self.modelSaver = tf.train.Saver( max_to_keep=trainSettings.MAX_TRAINING_SAVE_MODEL) # Session self.session = tf.Session() init = tf.global_variables_initializer() self.session.run(init) self.trainer.SetGraph(self.session.graph) self.validationEvaluator.SetGraph(self.session.graph)
def detect(transport_type: int, route_name: str, connector: Connector, logger=None): # загрузка классификатора classifier = Classifier.load(transport_type=transport_type, connector=connector, logger=logger) # загрузка информации о маршруте route_df = connector.getRouteData(route_name) data = route_df[[ "ON_DAY", "SUM_VALUES", "MAX(AGE1_SEX0)", "MAX(AGE2_SEX0)", "MAX(AGE3_SEX0)", "MAX(AGE1_SEX1)", "MAX(AGE2_SEX1)", "MAX(AGE3_SEX1)", ]].values route_nums = route_df[["ROUTE_NUMBER"]].values[:, 0] # получение меток классов labels = classifier.classify(data) # выделение аномальных данных anomaly_df = classifier.getAnomalys(data, route_nums, labels) if logger is not None: logger.info('Route ' + route_name + ' detected ' + str(len(anomaly_df)) + ' anomalies in data') return anomaly_df
def classify(self, entry): predictions = {} # Fill a dict with all the predictions and its scores. for tree in self.Forest: try: predictions[Classifier(entry, tree)] += 1 except KeyError: predictions[Classifier(entry, tree)] = 1 # Get the most voted prediction max = 0 max_prediction = None for p in predictions: if (predictions[p] > max): max = predictions[p] max_prediction = p return max_prediction
def btnDiagnose_clicked(self): ip = ImageProcessing() ip.main() bve = BloodVesselsExtract() bve.main() mex = MaExtract() mex.main() hd = HemorrhageDetection() hd.main() cl = Classifier() cl.training() df = pd.read_csv('records.csv') status = df["Diabetic_retinopathy_status"].values[0] grade = str(df["Diabetic_retinopathy_grade"].values[0]) severity = df["Severity"].values[0] # print(status) # print(grade) # print(severity) self.txtDr.setText(status) self.txtGrade.setText(grade) self.txtSeverity.setText(severity) os.remove("records.csv")
class Pipeline: def __init__(self, pipeline_dir, prepared_dir, classifier_dir, pretrained, normalizer_dir, model_yaml_path, encoder_level='char', decoder_level='char', onmt_dir='./OpenNMT-py', language='en'): """ pretrained = None to disable the classifier model_yaml_path = None to use Rule-based normalizer """ self.pipeline_dir = pipeline_dir self.prepared_dir = prepared_dir self.classifier_dir = classifier_dir self.pretrained = pretrained self.normalizer_dir = normalizer_dir self.encoder_level = encoder_level self.decoder_level = decoder_level self.onmt_dir = onmt_dir check_folder(self.pipeline_dir) check_folder(self.pipeline_dir + '/tmp') self.Classifier = Classifier(pretrained, prepared_dir, classifier_dir) self.Normalizer = Normalizer(model_yaml_path, prepared_dir, normalizer_dir, norm_only=False if pretrained else True, onmt_dir=onmt_dir, encoder_level=encoder_level, decoder_level=decoder_level, language=language) def train(self, num_train_epochs=10, learning_rate=1e-5, weight_decay=1e-2, per_device_train_batch_size=16, per_device_eval_batch_size=16, mode='pipeline'): if mode == 'pipeline': print("---pipeline mode---") print("Start training classifier...") self.Classifier.train(num_train_epochs, learning_rate, weight_decay, per_device_train_batch_size, per_device_eval_batch_size) print("Start training normalizer...") self.Normalizer.train() elif mode == 'normalizer': print("---normalizer mode---") print("Start training normalizer...") self.Normalizer.train() elif mode == 'classifier': print("---classifier mode---") print("Start training classifier...") self.Classifier.train(num_train_epochs, learning_rate, weight_decay, per_device_train_batch_size, per_device_eval_batch_size) else: print("Mode Error! Skip all training!") def eval(self, key='test', normalizer_step=-1, use_gpu=True): # make test data as sentence df = read_sentence_from_csv("{}/{}.csv".format(self.prepared_dir, key)) input_path = '{}/{}_input.txt'.format(self.pipeline_dir, key) target_path = '{}/{}_target.txt'.format(self.pipeline_dir, key) cls_path = '{}/{}_classified.csv'.format(self.pipeline_dir, key) tbn_path = '{}/{}_TBNormed.txt'.format(self.pipeline_dir, key) norm_path = '{}/{}_normed.txt'.format(self.pipeline_dir, key) output_path = '{}/{}_output.txt'.format(self.pipeline_dir, key) df[['src_token']].to_csv(input_path, header=False, index=False) df[['tgt_token']].to_csv(target_path, header=False, index=False) print("Start evaluating classifier...") df_cls = self.Classifier.predict(input_path, cls_path) df_TBNorm = df_cls[df_cls.tag != 'O'] df_TBNorm['token'].to_csv(tbn_path, header=False, index=False) print("Start evaluating normalizer...") df_nor = self.Normalizer.predict(input_path=tbn_path, output_path=norm_path, normalizer_step=normalizer_step, use_gpu=use_gpu) df_cls['pred'] = df_cls['token'] id_TBNorm = df_cls.index[df_cls['tag'] == 'B'].tolist() df_cls.loc[id_TBNorm, 'pred'] = df_nor['pred'].tolist() result = df_cls.groupby(['sentence_id']).agg({'pred': ' '.join}) result[['pred']].to_csv(output_path, header=False, index=False) command_wer = 'wer {ref_path} {hyp_path}'.format(ref_path=target_path, hyp_path=output_path) print(command_wer) print(os.popen(command_wer).read()) def predict(self, input_path, output_path, normalizer_step=-1, use_gpu=True): tmp_dir = self.pipeline_dir + '/tmp' key = 'tmp' cls_path = '{}/{}_classified.csv'.format(tmp_dir, key) tbn_path = '{}/{}_TBNormed.txt'.format(tmp_dir, key) norm_path = '{}/{}_normed.txt'.format(tmp_dir, key) print("Start predicting classifier...") df_cls = self.Classifier.predict(input_path, cls_path) df_TBNorm = df_cls[df_cls.tag != 'O'] df_TBNorm['token'].to_csv(tbn_path, header=False, index=False) print("Start predicting normalizer...") df_nor = self.Normalizer.predict(input_path=tbn_path, output_path=norm_path, normalizer_step=normalizer_step, use_gpu=use_gpu) df_cls['pred'] = df_cls['token'] id_TBNorm = df_cls.index[df_cls['tag'] == 'B'].tolist() df_cls.loc[id_TBNorm, 'pred'] = df_nor['pred'].tolist() result = df_cls.groupby(['sentence_id']).agg({'pred': ' '.join}) result[['pred']].to_csv(output_path, header=False, index=False) print("Prediction saved to: ", output_path)
floatPrecision.format(videoAccuracy_), " duration:", "{0:.2f}".format(duration_) + "(s)\n") else: print("\t loss:", floatPrecision.format(loss_), " frame accuracy:", floatPrecision.format(frameAccuracy_), " given frame threshold:", threshold_, " video accuracy:", floatPrecision.format(videoAccuracy_), " duration:", "{0:.2f}".format(duration_) + "(s)\n") if __name__ == '__main__': numberOfArguments = len(sys.argv) if (numberOfArguments == 2) or (numberOfArguments == 3): PATH_TO_DATA_SET_CATELOG = sys.argv[1] classifier = Classifier() classifier.Build() evaluator = Evaluator("evaluate", PATH_TO_DATA_SET_CATELOG, classifier) with tf.Session() as session: init = tf.global_variables_initializer() session.run(init) print("Load Model from: ", evalSettings.PATH_TO_MODEL_CHECKPOINTS) modelLoader = tf.train.Saver() modelLoader.restore(session, evalSettings.PATH_TO_MODEL_CHECKPOINTS) startEvaluateTime = time.time() if numberOfArguments == 2: print("Start evaluate: ", PATH_TO_DATA_SET_CATELOG,
import json from src.Classifier import Classifier from src.DBConnector import Connector config_json = open('./connection.config') config = json.load(config_json) connector = Connector(config) classifier = Classifier(1, connector, './samples/transport_1.csv')
import os import sys lib_path = os.path.abspath(os.path.join(sys.path[0], '..')) sys.path.append(lib_path) from src.Classifier import Classifier from src.PreProcessing import PreProcessing import numpy as np import matplotlib.pyplot as plt import time from multiprocessing import Queue classifier_classes = Classifier.classifier_dict() abspath = os.path.abspath(sys.path[0]) CONFIG = { 'frame size': 512, 'overlap': 128, 'sample rate': 44100, 'is training': True, 'is streaming': True, 'continuous stream': False, 'data list': '../DataSet/DataList_all.txt', 'classifier': ['SVM', 'KNN'], 'argumentation': True, 'debug tool': False, 'error stat': False } frame_count = np.zeros((101,)) print(frame_count.shape, frame_count)
from src.LocalBinaryPatternUtil.Params import Params from src.PatternBuilder import PatternBuilder from src.Classifier import Classifier import src.LogoExtractor as extractor import os where = os.path.dirname(os.path.realpath(__file__)) logoDictionary={'ChangAn':1,'VolksWagen':2, 'JiangHuai':3, 'JinBei':4, 'KaiRui':5, 'QiRui':6, 'Hyundai':7} sourcePath = where + "/src/resources/logo_template" histogramPath = where + "/src/resources/lbph.dat" params = Params(1, 6, 4, 4) # TODO ::: make input image configurable. later... logo = extractor.logoExtraction(where+ "/src/resources/testImages/volkswagen1.jpg") patternBuilder = PatternBuilder(params, logoDictionary, sourcePath, histogramPath) patternBuilder.buildAll() patternBuilder.cache.saveHistogramData() classifer = Classifier(patternBuilder.cache, params, logoDictionary) classifer.guessLogo(logo)
from src.Type import Type from src.Classifier import Classifier from src.utils import run_experiment if __name__ == '__main__': nb_classifier = Classifier(class_type=Type.NB, feature_cut_off=0) svm_classifier = Classifier(class_type=Type.SVM, feature_cut_off=0) print("unigrams") # unigrams run_experiment(nb_classifier, svm_classifier, "unigrams") # unigrams + stemming nb_classifier.stemming = True svm_classifier.stemming = True run_experiment(nb_classifier, svm_classifier, "unigrams and stemming") # unigrams + frequency nb_classifier.frequency = True svm_classifier.frequency = True run_experiment(nb_classifier, svm_classifier, "unigrams, stemming and frequency") # unigrams + stemming + frequency nb_classifier.stemming = False svm_classifier.stemming = False run_experiment(nb_classifier, svm_classifier, "unigrams and frequency") print("bigrams") # bigrams + stemming nb_classifier.stemming = True
import pandas from src.Classifier import Classifier classifier = Classifier(1, None, "./samples/transport_1.csv") print(classifier.accuracy)