def main(): clf = Classify() while (True): word = input('Word: ') classification = clf.classifyWord(word) if (classification[0] > classification[1]): print("Telugu, with prob matrix") else: print("English, with prob matrix") print(classification)
def activity(self): self.output.setText("请稍等几秒钟......") sentence = self.input.text() print(self.input.text()) args = parse_args() print("ok") IE = TripleIE(sentence, args.ltp, args.clean) out = IE.run() if re.search(r"句子结构不完整", out, flags=0) == None: CL = Classify(sentence, out) out += "句子类型:" + CL.SentenceType() self.output.setText(out)
def __init__(self, type): if type == 'train': self.c = Classify(train=False, type='train') else: self.c = Classify(train=False, type='valid') self.c.load_data() self.labels = self.c.temp self.tgt_sentences = self.c.sentences self.src_sentences = self.c.raw_sentence self.trainData = []
def selectMode(self, configDict, automatic): mode = "" if automatic == True: mode = getattr(self, "mode") else: while not mode: mode = input( "Enter mode. \'prepare\' , \'classify\' , \'train\' or \'predict\'\n" ) if mode == "classify" or mode == "train" or mode == "predict" or mode == "prepare": print("Correct mode chosen.") else: print("Wrong mode!") mode = "" if mode == "classify": from classify import Classify classify = Classify(**dict(configDict["Classify"])) classify.classifyMultipleImages() elif mode == "train": from trainModel import TrainModel train = TrainModel(**dict(configDict["Train"])) train.createModel() elif mode == "predict": from predictService import PredictService predict = PredictService(**dict(configDict["Predict"])) predict.downloadImage() predict.predict() elif mode == "prepare": args = { "pathToConfig": configDict["Prepare"]["configPath"], } self.GlobalServices(**args).prepareFolders( input( "Enter root folder name. (Contains default folder structure.)\n" ))
def classify(self, options): """Determine taxonomic classification of genomes.""" check_dir_exists(options.align_dir) make_sure_path_exists(options.out_dir) genomes = self._genomes_to_process(options.genome_dir, options.batchfile, options.extension) classify = Classify(options.cpus) classify.run(genomes, options.align_dir, options.out_dir, options.prefix, options.debug) self.logger.info('Done.')
def main(): print "Creating Train File..." Data.CreateDataFile("mails-train", "mails-train.txt") print "Initializing Train File..." trainingSet = Data.LoadFile("mails-train.txt") print "Creating Test File..." Data.CreateDataFile("mails-test", "mails-test.txt") print "Initializing Test File..." testSet = Data.LoadFile("mails-test.txt") print "Extracting Classes.." classes = Train.ExtractClasses(trainingSet) print "Training NBC..." vocabulary, prior, condprob = Train.TrainMultinomialNaiveBayes( classes, trainingSet) print "Testing Accuracy..." percentage = Test.Accuracy(classes, vocabulary, prior, condprob, testSet) print "The percentage of correct predictions is ", 100 * percentage, "percent." print "Get Random Document..." testDocument = Data.GetDocument(testSet) print "Do A Time Measurement of the Application of the NBC..." print "The time is took to do a single application of the NBC on a document is", Test.TimeMeasure( classes, vocabulary, prior, condprob, testDocument), "seconds." print "Applying NBC on Document..." topClass, score = Classify.ApplyMultinomialNaiveBayes( classes, vocabulary, prior, condprob, testDocument['document'])
def main(test_file, threshold): #extract pdfs train = Train(1) #print train.tot_mean #print train.tot_std #compute probability for test data ai = Classify(test_file, train, threshold) return 0
def TimeMeasure(classes, vocabulary, prior, condprob, document): # Neemt een tijdmeting voor en na het uitvoeren van ApplyMultinomialNBC om te kijken # hoelang het duurt om deze uit te voeren op het gegeven document. start = time.time() Classify.ApplyMultinomialNaiveBayes(classes, vocabulary, prior, condprob, document) end = time.time() return end - start
def make_forest(n=10): forest = [] for i in range(n): train = load.load_train(2000) train_labels = np.argmax(train[1], axis=1) classify = Classify(6, train[0], train_labels) forest.append(classify) return forest
def predict_accident(self): insert_into_DB = 1 db = DBConnection() conn = db.get_connection() mycursor = conn.cursor() mycursor.execute("SELECT path FROM buffer") buffer_items = mycursor.fetchall() for path_row in buffer_items: path = path_row[0] clf = Classify(path) class_name, percentage = clf.classify_image() if (class_name[0] is 'a' or class_name[0] is 'A') and (insert_into_DB is 1): insert_into_DB = 0 print('accident detected') Camera_id = 'CAM001' db1 = DBConnection() conn1 = db1.get_connection() mycursor1 = conn1.cursor() mycursor1.execute("SELECT count(path) FROM Accident") count_row = mycursor1.fetchone() new_path = '../accident/Accident' + str(count_row[0]) + '.jpg' copyfile(path, new_path) date_time = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") timestamp = time.time() sql1 = "insert into Accident(Camera_id,path,date_time,timestampAcc) values(%s,%s,%s,%s);" mycursor1.execute( sql1, [Camera_id, new_path, date_time, int(timestamp)]) conn1.commit() mycursor1.execute( "UPDATE flag set flag_var = 1 where flag_key = 1;") conn1.commit() mycursor1.execute( "UPDATE smbool set continue_buffer = 0 where flag_var = 0") conn1.commit() if (insert_into_DB is 0): print('skipping database entry') sql = "DELETE FROM buffer WHERE path = %s" mycursor.execute(sql, [path]) conn.commit() os.remove(path)
def generate_model_output(self): self.classifier = Classify(self.weights, self.root_dir, self.emails_threshold, self.results_size, results_dir=self.result_path_out, serial_path=self.model_path_out, memlog_freq=self.memlog_classify_frequency, debug_training=self.debug_training, filterRecipients=self.filter_recipients, recipientTargetFile=self.recipients) logs.Watchdog.reset() self.classifier.generate_training() logs.Watchdog.reset() self.classifier.train_clf() logs.Watchdog.reset() self.classifier.cross_validate() logs.Watchdog.reset() self.classifier.test_and_report() logs.Watchdog.reset()
def testSingleFile(self): f = askopenfile(mode='r', defaultextension=".txt") if f is None: # askopenfile return `None` if dialog closed with "cancel". return lines = f.read() bagOfWords = re.split(' ',lines) singleFile = Data.Normalize(bagOfWords) print "Loaded." print "Calculating..." topClass, score = Classify.ApplyMultinomialNaiveBayes(self.classes, self.vocabulary, self.prior, self.condprob, singleFile) print "This document belongs to", topClass print "Done." f.close() # `()` was missing.
def Accuracy(classes, vocabulary, prior, condprob, dataset): # Voert ApplyMulitnomialNBC uit op een serie documenten, waarvan we de class kennen. # Er wordt geteld hoevaak de voorspelling overeenkomt met de echte waarde. # De teruggegeven waarde is een fractie tussen 0 en 1 die aangeeft welk deel van de keren de voorspelling correct was. correct = 0 totaal = 0 count = 0 for c in dataset: for d in dataset[c]: topClass, score = Classify.ApplyMultinomialNaiveBayes( classes, vocabulary, prior, condprob, dataset[c][d]) if topClass == c: correct += 1 totaal += 1 print "The amount of total tested documents is", totaal print "The amount of correct predictions are", correct return correct / totaal
def generate_model_output(self): self.classifier = Classify(self.weights, self.root_dir, self.emails_threshold, self.results_size, results_dir=self.result_path_out, serial_path=self.model_path_out, memlog_freq=self.memlog_classify_frequency, debug_training=self.debug_training) logs.Watchdog.reset() self.classifier.generate_training() logs.Watchdog.reset() self.classifier.train_clf() logs.Watchdog.reset() self.classifier.cross_validate() logs.Watchdog.reset() self.classifier.test_and_report() logs.Watchdog.reset()
def blind_test(feature_sets): classifier = Classify(init=False) classifier.network_filename = 'hl1__sizes-200-__acfn-rel-__dr-0.2-0.5-__lr0.1__mb100__mom0.9__seed1.hdf5' classifier.initialize_network() labels = [] for image in feature_sets: image_1d = map(lambda value: value / 255.0, image) image_2d = [] for row_idx in range(28): image_2d.append([]) for col_idx in range(28): image_2d[row_idx].append(image_1d[row_idx * 28 + col_idx]) label = classifier.classify(image_2d) labels.append(label) return labels
from ie import TripleIE from classify import Classify def parse_args(): parser = argparse.ArgumentParser('TripleIE') parser.add_argument('--ltp', type=str, default='E:\python\ltp_data_v3.4.0', help='the path to LTP model') parser.add_argument('--clean', action='store_true', help='output the clean relation(no tips)') return parser.parse_args() if __name__ == "__main__": args = parse_args() # sentence = "国务院总理李克强积极地快乐地调研美丽的上海" sentence = "什么时候放假呢" IE = TripleIE(sentence, args.ltp, args.clean) result = IE.run() if result != "句子结构不完整": CL = Classify(sentence, result) result += "句子类型:" + CL.SentenceType() print(result)
from classify import Classify from flask import request, Flask app = Flask(__name__) imgdir = './images/' @app.route('/api/', methods=["POST"]) def classify_img(): url = request.headers.get('url') return str(c.classify(url)) if __name__ == "__main__": c = Classify() app.run(debug=True)
class PhishDetector(object): def __init__(self): #Flag Configurations self.generate_data_matrix = False self.generate_test_matrix = False self.generate_model = False self.classify = False self.config_path = 'config.yaml' #Config File Configurations self.root_dir = None self.filename = None self.weights = None self.sender_profile_percentage = 0 self.data_matrix_percentage = 0 self.test_matrix_percentage = 0 self.emails_threshold = 1000 self.results_size = 10 self.model_path_out = './model' self.result_path_out = './summary' self.detectors = None self.parallel = None #Generator and Classifier self.classifier = None self.parse_config() self.parse_args() def parse_args(self): """ Parses command line arguments. """ parser = argparse.ArgumentParser(description='Mange spear fishing detector.') parser.add_argument('--all', action='store_true', help=('Generate and serialize data matrix, test matrix, and ML model, then run ML model on test matrix')) parser.add_argument('--gen_all', action='store_true', help=('Generate and serialize data matrix, test matrix, and ML model')) parser.add_argument('--gen_data', action='store_true', help=('Generate and serialize data matrix')) parser.add_argument('--gen_test', action='store_true', help=('Generate and serialize test matrix')) parser.add_argument('--gen_model', action='store_true', help=('Generate and serialize ML model')) parser.add_argument('--classify', action='store_true', help=('Run ML model on test matrix')) parser.add_argument('--debug_training', action='store_true', help=('Debug the training step of the pipeline.')) parser.add_argument('--mbox', action='store_true', help=('Use emails from mbox rather than pcaps')) args = parser.parse_args() run = False self.debug_training = False if args.all: self.generate_data_matrix = True self.generate_test_matrix = True self.generate_model = True self.classify = True run = True if args.gen_all: self.generate_data_matrix = True self.generate_test_matrix = True self.generate_model = True run = True if args.gen_data: self.generate_data_matrix = True run = True if args.gen_test: self.generate_test_matrix = True run = True if args.gen_model: self.generate_model = True run = True if args.classify: self.classify = True run = True if args.debug_training: self.generate_data_matrix = True self.generate_test_matrix = True self.generate_model = True self.classify = True self.debug_training = True run = True if not run: parser.error('You must run with at least one flag') def parse_config(self): """ Parses configuration file. Assumes configuration is in same directory as this script. """ try: stream = file(self.config_path, 'r') except IOError: progress_logger.exception("Could not find yaml configuration file.") raise config = yaml.load(stream) expected_config_keys = [ 'root_dir', 'regular_filename', 'phish_filename', 'sender_profile_percentage', 'data_matrix_percentage', 'test_matrix_percentage', 'use_name_in_from', 'model_path_out', 'result_path_out', 'weights', 'detectors', 'emails_threshold', 'batch_threading_size', 'offline', 'results_size', 'parallel', 'num_threads', 'logging_interval', 'memlog_gen_features_frequency', 'memlog_classify_frequency' ] try: for key in expected_config_keys: setattr(self, key, config[key]) except KeyError: progress_logger.exception("Configuration file missing entry") raise detectors = [] for detector, val in self.detectors.items(): if val == 1: detectors.append(getattr(globals()['fc'], detector)) self.detectors = detectors self.root_dir = os.path.abspath(self.root_dir) Lookup.initialize(offline=self.offline) def prep_features(self, directory): regular_path = os.path.join(directory, self.regular_filename) phish_path = os.path.join(directory, self.phish_filename) feature_generator = FeatureGenerator(directory, regular_path, phish_path, self.sender_profile_percentage, self.data_matrix_percentage, self.test_matrix_percentage, self.detectors ) feature_generator.do_generate_data_matrix = self.generate_data_matrix feature_generator.do_generate_test_matrix = self.generate_test_matrix return feature_generator def generate_features(self): if self.use_name_in_from != 0: Detector.USE_NAME = True dir_to_generate = [] progress_logger.info('Starting directory aggregation in feature generation.') start_time = time.time() for dirpath, dirnames, filenames in os.walk(self.root_dir): if ((self.generate_data_matrix and self.regular_filename in filenames and self.phish_filename in filenames) or (self.generate_test_matrix and self.regular_filename in filenames)): dir_to_generate.append(dirpath) logs.Watchdog.reset() end_time = time.time() min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60) progress_logger.info('Finished directory aggregation in feature generation in {} minutes, {} seconds'.format(min_elapsed, sec_elapsed)) BATCH_SIZE = self.batch_threading_size if self.parallel: progress_logger.info('Starting feature generation with {} threads in parallel with batch size {}...'.format(self.num_threads, BATCH_SIZE)) start_time = time.time() feature_generators = [] for directory in dir_to_generate: feature_generator = self.prep_features(directory) feature_generators.append(feature_generator) if len(feature_generators) == BATCH_SIZE: p = Pool(self.num_threads) p.map(run_generator, feature_generators) p.close() p.join() feature_generators = [] if len(feature_generators) > 0: p = Pool(self.num_threads) p.map(run_generator, feature_generators) p.close() p.join() end_time = time.time() min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60) progress_logger.info('Finished feature generation in {} minutes, {} seconds.'.format(min_elapsed, sec_elapsed)) else: progress_logger.info('Starting feature generation serially for {} directories'.format(len(dir_to_generate))) start_time = time.time() last_logged_time = start_time dir_count = 0 end_of_last_memory_track = dt.datetime.now() for directory in dir_to_generate: dir_count += 1 logs.context = {'feature gen': dir_count} curr_time = time.time() if (curr_time - last_logged_time) > self.logging_interval * 60: progress_logger.info('Processing directory #{} of {}'.format(dir_count, len(dir_to_generate))) progress_logger.info('Feature generation has run for {} minutes'.format(int((curr_time - start_time) / 60))) last_logged_time = curr_time feature_generator = self.prep_features(directory) feature_generator.run() logs.Watchdog.reset() now = dt.datetime.now() time_elapsed = now - end_of_last_memory_track minutes_elapsed = time_elapsed.seconds / 60.0 if minutes_elapsed > self.memlog_gen_features_frequency: MemTracker.logMemory('After generating features for {}th sender'.format(dir_count)) end_of_last_memory_track = dt.datetime.now() logs.context = {} end_time = time.time() min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60) progress_logger.info('Finished feature generation in {} minutes, {} seconds.'.format(min_elapsed, sec_elapsed)) def generate_model_output(self): self.classifier = Classify(self.weights, self.root_dir, self.emails_threshold, self.results_size, results_dir=self.result_path_out, serial_path=self.model_path_out, memlog_freq=self.memlog_classify_frequency, debug_training=self.debug_training) logs.Watchdog.reset() self.classifier.generate_training() logs.Watchdog.reset() self.classifier.train_clf() logs.Watchdog.reset() self.classifier.cross_validate() logs.Watchdog.reset() self.classifier.test_and_report() logs.Watchdog.reset() def execute(self): detector_names = ', '.join([d.__name__ for d in self.detectors]) progress_logger.info("Config settings: use_name_in_from={}, parallel={}, detectors={}".format(self.use_name_in_from, self.parallel, detector_names)) start_time = time.time() MemTracker.initialize(memory_logger) logs.Watchdog.initialize() logs.context = {'phase': 'generate_features'} if self.generate_data_matrix or self.generate_test_matrix: self.generate_features() logs.context = {} MemTracker.logMemory("After generating features/Before generating model") logs.context = {'phase': 'generate_model_output'} if self.generate_model: self.generate_model_output() logs.context = {} MemTracker.logMemory("After generating model") end_time = time.time() min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60) progress_logger.info("Phish Detector took {} minutes, {} seconds to run.".format(min_elapsed, sec_elapsed)) logs.RateLimitedLog.flushall()
from __future__ import division import sys import os from classify import Classify from db import Db classifier = Classify() def is_doctype_valid(doctype): return Db().get_words_count(doctype) > 0 def check_file(f): results = [] for line in open(f, 'r').readlines(): try: classifier.set_text(line) results += [classifier.execute()] except ValueError: pass return results def check_dir(d): results = [] for f in os.listdir(d): if f.endswith(".js"): results += check_file(os.path.join(d,f)) return results def show_results(results):
import cv2 from classify import Classify from CNN_model import CNNModel from dataset_creation import CreateTrainData CreateTrainData.createFrames( "frames") #frames is a path where frames will be saved CreateTrainData.dataToTrainAndTest() CNNModel.runModel() CNNModel.ModelResultsPlot("ggplot") vs = cv2.VideoCapture('331.mp4') Classify.classifyVideo(vs)
def get_image_data(): dat = request.form['javascript_data'] saveToPNG(dat) pred = Classify('image2.png') return str(pred)
'ATL', 'LAX', 'ORD', 'DFW', 'DEN', 'JFK', 'SFO', 'LAS', 'SEA', 'CLT' ] # airportList = ['LAX','BOS','ATL'] # airportList = ['ATL','LAX','ORD','DFW','DEN','JFK','SFO','LAS','SEA','CLT']#, 'EWR','MCO','PHX','MIA','IAH','BOS']#,'MSP','DTW','FLL','LGA'] ad = Preprocess(airportList=airportList) # ad.parseData('2017.csv') # create df for plotting before filtering out the airports # ad.createplotdf() # ad.initialPlots() # filter by airport for the ML data # ad.filterByAirport() ad.createMLdf() cl = Classify() # tune the tree - choose depth = 10, minLeaf = 50 # cl.tuneTree_acc(depth=list(range(1,11)), minLeaf=[2,5,10,20,50,100]) # cl.tuneTree(depth=list(range(10,101,5))) # cl.runTree(maxDepth=10, minLeaf=50, printRules=False) # tune Logistic - choose C = 1 # cl.tuneLogistic() # cl.runLogistic(C=1) # tune SVM - choose C = 10 # cl.tuneSVM() # cl.runSVM(C=10) # tune NN - choose layers=(2,), alpha=0.01
import cv2 import numpy as np import os from classify import Classify model = "face/face_lady_man.model" labelbin = "face/face_lady_man" _classify = Classify(model, labelbin) recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read('trainer/trainer.yml') cascadePath = "Cascades/haarcascade_frontalface_default.xml" faceCascade = cv2.CascadeClassifier(cascadePath) font = cv2.FONT_HERSHEY_SIMPLEX #iniciate id counter id = 0 # names related to ids: example ==> Marcelo: id=1, etc names = ['0', 'AKN', 'ManGate'] # Initialize and start realtime video capture cam = cv2.VideoCapture(0) cam.set(3, 640) # set video widht cam.set(4, 480) # set video height # Define min window size to be recognized as a face minW = 0.1 * cam.get(3) minH = 0.1 * cam.get(4) xx = 0 while True: xx = xx + 1 ret, img = cam.read()
def Classify(self): detection = Classify(self.matrix) if detection is None: self.detection = '?' else: self.detection = str(detection)
def post(self): data = request.json cl = Classify(data) return cl.result
:return: """ self.analyzer.load_model('title') title_vector = self.analyzer.transform([title]) self.analyzer.load_model('abstract') abstract_vector = self.analyzer.transform([abstract]) self.analyzer.load_model('claims') claims_vector = self.analyzer.transform([claims]) feature_vector = hstack([title_vector, abstract_vector]) feature_vector = hstack([feature_vector, claims_vector]) return feature_vector def predict(self, feature_vector): """ Predict class based on feature vector input :param feature_vector: :return: """ group = self.classify.predict(feature_vector) return group if __name__ == '__main__': config_info = Config() f = Factory(config_info) file = '2015_2016_Patent_Data_new.csv' feature_matrix, response_vector = f.get_all_column_data(file) f.classify = Classify(config_info, feature_matrix, response_vector) f.full_train()
#-*- coding:utf-8 -*- from classify import Classify import numpy as np import sys reload(sys) sys.setdefaultencoding("utf8") if __name__ == "__main__": X_train = np.array([ u"我想听张学友的歌", u"周杰伦的龙卷风", u"鹿晗有什么歌好听", u"姚明打篮球好厉害", u"张继科会打乒乓球", u"詹姆士是体育明星" ]) Y_train = np.array([1, 1, 1, 2, 2, 2]) Test_data = [u"我想听薛之谦的演员", "邓亚萍是体育明星", "刘翔是体育明星"] Model = Classify() Model.load_W2V_Model("word2vec.model") Model.train(X_train, Y_train) Model.predict(Test_data) Model.save_NBmodel("NB.model") del Model NBmodel_test = Classify() NBmodel_test.load_NBmodel("NB.model") NBmodel_test.predict(Test_data) del NBmodel_test
from __future__ import division import sys import os from classify import Classify from db import Db classifier = Classify() def is_doctype_valid(doctype): return Db().get_words_count(doctype) > 0 def check_file(f): results = [] for line in open(f, 'r').readlines(): try: classifier.set_text(line) results += [classifier.execute()] except ValueError: pass return results def check_dir(d): results = [] for f in os.listdir(d): if f.endswith(".js"): results += check_file(os.path.join(d, f))
def get_wf(): wf = pe.Workflow(name="svc_workflow") wf.base_dir = os.path.join(workingdir,"npairs_IQ_C1e-11") wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files" #INFOSOURCE ITERABLES subject_id_infosource = pe.Node(util.IdentityInterface(fields=['subject_id']), name="subject_id_infosource") subject_id_infosource.iterables = [('subject_id', subjects)] scan_id_infosource = pe.Node(util.IdentityInterface(fields=['scan_id']), name= 'scan_id_infosource') scan_id_infosource.iterables = ('scan_id', scans) preproc_id_infosource = pe.Node(util.IdentityInterface(fields=['preproc_id']), name="preproc_id_infosource") preproc_id_infosource.iterables = ('preproc_id', preprocs) #DATAGRABBER datagrabber = pe.Node(nio.DataGrabber(infields=['subject_id', 'scan_id','preproc_id'], outfields=['falff_files','dr_files','reho_files']), name='datagrabber') datagrabber.inputs.base_directory = '/' datagrabber.inputs.template = '*' datagrabber.inputs.field_template = dg_template datagrabber.inputs.template_args = dg_args datagrabber.inputs.sort_filelist = True wf.connect(subject_id_infosource, 'subject_id', datagrabber, 'subject_id') wf.connect(scan_id_infosource, 'scan_id', datagrabber, 'scan_id') wf.connect(preproc_id_infosource, 'preproc_id', datagrabber, 'preproc_id') #OUTPUT PATHS & LABELS toText = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="falff_text_files") wf.connect(datagrabber, 'falff_files', toText, 'in_file') toText2 = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="reho_text_files") wf.connect(datagrabber, 'reho_files', toText2, 'in_file') toText3 = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="dr_text_files") wf.connect(datagrabber, 'dr_files', toText3, 'in_file') #RUN CLASSIFIERs classifier = pe.Node(Classify(), name='SVC_falff') classifier.inputs.mask_file = mask_file wf.connect(toText, 'label_file', classifier, 'label_file') wf.connect(toText, 'data_paths', classifier, 'path_file') classifier2 = pe.Node(Classify(), name='SVC_reho') classifier2.inputs.mask_file = mask_file wf.connect(toText2, 'label_file', classifier2, 'label_file') wf.connect(toText2, 'data_paths', classifier2, 'path_file') classifier3 = pe.Node(Classify(), name='SVC_dr') classifier3.inputs.mask_file = mask_file wf.connect(toText3, 'label_file', classifier3, 'label_file') wf.connect(toText3, 'data_paths', classifier3, 'path_file') #DATASINK ds = pe.Node(nio.DataSink(), name='datasink') ds.inputs.base_directory = outputdir wf.connect(classifier, 'pred', ds, 'prediction_accuracy_falff') wf.connect(classifier, 'rep', ds, 'reproducibility_falff') wf.connect(classifier, 'imgs', ds, "img_labels_falff") wf.connect(classifier, 'splits', ds, "splits_falff") wf.connect(classifier, 'sexs', ds, "sex_labels_falff") wf.connect(classifier, 'coefs', ds, "coefs_falff") wf.connect(classifier, 'datary', ds, "data_array_falff") wf.connect(classifier2, 'pred', ds, 'prediction_accuracy_reho') wf.connect(classifier2, 'rep', ds, 'reproducibility_reho') wf.connect(classifier2, 'imgs', ds, "img_labels_reho") wf.connect(classifier2, 'splits', ds, "splits_reho") wf.connect(classifier2, 'sexs', ds, "sex_labels_reho") wf.connect(classifier2, 'coefs', ds, "coefs_reho") wf.connect(classifier2, 'datary', ds, "data_array_reho") wf.connect(classifier3, 'pred', ds, 'prediction_accuracy_dr') wf.connect(classifier3, 'rep', ds, 'reproducibility_dr') wf.connect(classifier3, 'imgs', ds, "img_labels_dr") wf.connect(classifier3, 'splits', ds, "splits_dr") wf.connect(classifier3, 'sexs', ds, "sex_labels_dr") wf.connect(classifier3, 'coefs', ds, "coefs_dr") wf.connect(classifier3, 'datary', ds, "data_array_dr") wf.config['execution'] = { 'plugin': 'Linear', 'stop_on_first_rerun': 'False', 'hash_method': 'timestamp'} return wf
class Factory(object): def __init__(self, config): self.config = config self.analyzer = Analyzer(self.config) self.classify = Classify(config) @staticmethod def get_all_column_data(file): """ Combine all column data into a single feature matrix :param file: :return: """ # Get all the feature matrices title_matrix, response_vector = f.analyze_column_data(file, 'title') abstract_matrix, response_vector = f.analyze_column_data( file, 'abstract') claims_matrix, response_vector = f.analyze_column_data(file, 'claims') # Get them all together feature_matrix = hstack([title_matrix, abstract_matrix]) feature_matrix = hstack([feature_matrix, claims_matrix]) return feature_matrix, response_vector def analyze_column_data(self, filename, column_name): """ Create the feature model and matrix for the abstract column :param filename: :return: """ self.analyzer.load_patent_data(filename) self.analyzer.extract_data(column_name) n_grams = 1 self.analyzer.extract_features(n_grams, column_name) return self.analyzer.feature_matrix, self.analyzer.response def compute_heuristics(self, filename, column_name): """ Figure out what words make up the groups in the shit :param filename: :return: """ self.analyze_column_data(filename, column_name) self.analyzer.heuristics(column_name) def full_train(self): """ GET THE CLASSIFIER TRAINED :return: """ # self.classify.feature_selection() self.classify.classifier_selection() # self.classify.optimize_classifier() self.classify.train() self.classify.save_classifier() def evaluate(self, title, abstract, claims): """ Predict group of a single entry :param abstract: :return: """ self.analyzer.load_model('title') title_vector = self.analyzer.transform([title]) self.analyzer.load_model('abstract') abstract_vector = self.analyzer.transform([abstract]) self.analyzer.load_model('claims') claims_vector = self.analyzer.transform([claims]) feature_vector = hstack([title_vector, abstract_vector]) feature_vector = hstack([feature_vector, claims_vector]) return feature_vector def predict(self, feature_vector): """ Predict class based on feature vector input :param feature_vector: :return: """ group = self.classify.predict(feature_vector) return group
def action(q: str, a1: str = "", a2: str = "", a3: str = ""): if q == "login": try: dt.config["mofid_login"] = int(a1) except: return "فقط عدد مجاز است!" dt.config["mofid_pass"] = a2 dt.save_config() return str(dt.init_mofid()) elif q == "classify": global classifier if classifier is not None and classifier.active: return "already" classifier = Classify() classifier.start() return "started" elif q == "reset": c = dt.cur(True) try: for rt in fn.required_tables.keys(): c.execute("DROP TABLE IF EXISTS " + rt) except: return "aborted" dt.cur(False) return "done" elif q == "check": c = dt.cur(True) c.execute("SELECT auto FROM symbol WHERE id='" + a1 + "' LIMIT 1") try: stat = c.fetchone()[0] # int except IndexError: return "not found" binary = fn.auto_to_binary(stat) if a2 == "-1": binary = "".join([a3 for _ in range(len(dt.config["timeframes"]))]) else: binary = list(binary) binary[int(a2)] = a3 binary = "".join(binary) c.execute("UPDATE symbol SET auto = '" + str(int(binary, 2)) + "' WHERE id='" + a1 + "'") dt.connect.commit() dt.cur(False) return binary elif q == "analyze": ret = fn.persian_board(a3) if ret is None: return "invalid date" a = ret[0] b = ret[1] Analyzer.put_temp(a1, int(a2), a, b) return '<img src="./html/img/indicator_1.png" class="indicator">' elif q == "delete": a = b = tfr = None if a2 != "": tfr = dt.config["timeframes"][int(a2)]["value"] if a3 != "": ret = fn.persian_board(a3) if ret is None: return "invalid date" a = ret[0] b = ret[1] Analyzer.put_temp(a1, tfr, a, b, "delete") return "saved" elif q == "update_all": global updating if updating: return "already" updating = True c = dt.cur(True) c.execute("SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%';") tbs = fn.tables(c) dt.cur(False) for tb in tbs: if tb not in fn.required_tables: update_table(tb) updating = False return "saved" elif q == "update_symbol": c = dt.cur(True) c.execute("SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%';") tbs = fn.tables(c) dt.cur(False) for tb in tbs: if tb.startswith("s" + str(a1) + "_"): update_table(tb) return "saved" elif q == "update_table": since = None if a3 != "": try: since = fn.persian_date(a3) except: return "invalid date" tb = "s" + str(a1) + "_" + dt.config["timeframes"][int(a2)]["name"].lower() update_table(tb, since) return "saved" elif q == "change_timeframe": which = -1 for tfr in range(len(dt.config["timeframes"])): if dt.config["timeframes"][tfr]["name"] == a1: which = tfr if which != -1: dt.config["timeframes"].pop(which) else: for ctf in fn.all_timeframes: if ctf["name"] == a1: dt.config["timeframes"].append(ctf) dt.config["timeframes"] = sorted(dt.config["timeframes"], key=lambda i: i['value']) dt.save_config() return str(not (which != -1)) elif q == "shutdown": mt5.shutdown() dt.connect.close() os.kill(os.getpid(), signal.SIGTERM) else: return 500
from imageprocessing.image_processor_predict import ImageProcessorPredict from classify import Classify from utils.app_constants import AppConstants import math if __name__ == '__main__': image_size = 224 channel = 3 label_size = 2 # Predict classify = Classify(image_size, label_size, channel, AppConstants.MODEL_PATH_NAME) classify.load() image_predict = ImageProcessorPredict(image_size) image_arr = image_predict.get_image(AppConstants.ROOT_MODEL + 'car/car1.jpg'); probs = classify.predict(image_arr)[0] cnt = int(sum([math.exp(i + 4) * probs[i] for i in range(len(probs))])) probs = [(i, round(100 * p, 1)) for i, p in enumerate(probs)] print probs
def __init__(self, config): self.config = config self.analyzer = Analyzer(self.config) self.classify = Classify(config)