def setUpTesting(SamplesSize, featurestype, modeltype): dirtocheck = "./trainingdata/"+featurestype; responsespath = "responses"; samplespath = "samples"; responses = []; samples = []; for root, _, files in os.walk(dirtocheck + responsespath): for f in files: fullpath = os.path.join(root, f) # print f newResponses = np.loadtxt(fullpath, np.float32) newResponses = newResponses.reshape((newResponses.size,1)); responses = np.append(responses, [newResponses]); for root, _, files in os.walk(dirtocheck + samplespath): for f in files: fullpath = os.path.join(root, f) # print f newSamples = np.loadtxt(fullpath, np.float32) #print newSamples samples = np.append(samples, [newSamples]); #print samples ninputs = len(samples)/len(responses); samples = np.array(samples.reshape(np.size(responses),np.size(samples)/np.size(responses)), np.float32); responses = np.array(responses, np.float32); # print samples # print responses noutputs = len(set(responses)); # print ninputs # print noutputs model = Classification.createModel(modeltype, ninputs, noutputs); responses_order = list(set(responses)); responses_order.sort(); Classification.trainModel(modeltype, model, responses, samples, responses_order); return [responses, samples, model, responses_order]
class MainWindow(QMainWindow): def __init__(self): super(MainWindow, self).__init__() self.setGeometry(450, 250, 500, 350) #self.setFixedSize(self.size()) self.setWindowTitle('Main') self.main() def main(self): #LABEL label = QLabel('Project Name:', self) label.move(40, 20) label2 = QLabel('File Path:', self) label2.move(70, 60) #TEXT INPUT BOX text = QLineEdit(self) text.resize(335, 25) text.move(150, 25) text.setDisabled(True) text2 = QLineEdit(self) text2.resize(335, 25) text2.move(150, 65) text2.setDisabled(True) #BUTTON btn3 = QPushButton('PROCEED', self) btn3.setEnabled(False) btn3.resize(150, 45) btn3.move(190, 280) btn3.clicked.connect(self.nextWindow) btn = QPushButton('LOAD DATASET', self) btn.resize(150, 45) btn.move(335, 150) btn.clicked.connect(self.file_open) btn.clicked.connect(lambda: btn3.setEnabled(True)) #PROGRESS BAR self.progress = QProgressBar(self) self.progress.setGeometry(150, 110, 335, 25) self.show() def nextWindow(self): self.window = QMainWindow() self.ui = Classification() self.ui.classification() self.close() def file_open(self, btn): name = QFileDialog.getOpenFileName(self, 'Open File') #PROGRESS FOR UPLOADING FILE self.completed = 0 while self.completed < 100: self.completed += 0.00005 self.progress.setValue(self.completed)
def calculate_classification(self): results = Classification() for i in range(self.number_cross_validation): train_data, test_data = self.split_data_set(i) classified_data = self.__classify(train_data, test_data) result = ClassificationUnit(train_data, test_data, classified_data) results.add_unit(result) return results
def train(model, path_source_documents, path_system_summaries, path_reference_summaries, path_system_summary_scores): [ sourceDocuments, systemSummaries, referenceSummaries, systemSummaryScores ] = loader.loadTrainingData(path_source_documents, path_system_summaries, path_reference_summaries, path_system_summary_scores) trainedModel = classification.trainModel(model, sourceDocuments, systemSummaries, referenceSummaries, systemSummaryScores) classification.saveModel(trainedModel, "Models/" + model + ".pkl")
def Test_Acc(): [acc, mis] = Cl.Accuracy(np.hstack((np.ones(10), 2 * np.ones(15), np.zeros(5))), np.hstack((np.ones(10), np.zeros(15), 2 * np.ones(5))), 3, True) [acc1, mis1 ] = Cl.Accuracy(np.hstack((np.ones(10), 2 * np.ones(15), np.zeros(5))), np.hstack((np.ones(10), np.zeros(15), 2 * np.ones(5))), 3, False) #print(acc, mis, acc1, mis1) if acc == 1.0 and mis == 0 and acc1 == 1 / 3 and mis1 == 20: return ("Funguje") else: return ("Nefunguje")
def __init__(self): self.childlist = {} self.parentlist = {} self.categorylist = Classification.getclassificationrule() self.childlist['x'] = [] for catnode in self.categorylist: if(Classification.isCat(catnode)): self.parentlist[catnode] = 'x' self.childlist['x'].append(catnode) self.childlist[catnode] = [] elif(Classification.isSubcat(catnode)): self.parentnode = Classification.subcatToCat(catnode) self.parentlist[catnode] = self.parentnode self.childlist[self.parentnode].append(catnode)
def PrincipalComponentAnalysis(model, train_data, test_data, n_comp, configg, model_nm, dataset_nm): model_name = f"{model_nm}_{dataset_nm}_dataset.csv" table = CL.ScoringTable(name=model_name, location=valid_path) for cross in range(10): start = time() train_matrix = CL.prepare_features(data=train_data[cross], config=configg) test_matrix = CL.prepare_features(data=test_data[cross], config=configg) labels = d.merge_labels(d.get_layer(train_data[cross], 2)) clf = copy(model) scale = StandardScaler() train_matrix = scale.fit_transform(X=train_matrix, y=labels) test_matrix = scale.transform(X=test_matrix) pca = PCA(n_components=n_comp) train_matrix = pca.fit_transform(X=train_matrix) test_matrix = pca.transform(X=test_matrix) states = CL.train_and_predict(clf, train_matrix, test_matrix, [], labels, unsupervised=False, HMMmodified=False) [a, m, f, f_a, p, r] = CL.score(states, test_data[cross][0][2], unsupervised=False) kombinace = "pca" params = "pca" table.add(scores=[a, m, f, f_a, p, r], n_estim=clf.n_estimators, configg={ "Komb": kombinace, "Param": params }) print(cross + 1, '. -> ', round(time() - start), "seconds") table.save_table() return table.return_table()
def start(self): # perform some logging self.jlogger.info("Starting job with job id {}".format(self.job_id)) self.jlogger.debug("Job Config: {}".format(self.config)) self.jlogger.debug("Job Other Data: {}".format(self.job_data)) try: rud.ReadUserData(self) fg.FeatureGeneration(self, is_train=True) pp.Preprocessing(self, is_train=True) fs.FeatureSelection(self, is_train=True) fe.FeatureExtraction(self, is_train=True) clf.Classification(self) cv.CrossValidation(self) tsg.TestSetGeneration(self) tspp.TestSetPreprocessing(self) tsprd.TestSetPrediction(self) job_success_status = True except: job_success_status = False helper.update_running_job_status(self.job_id, "Errored") self.jlogger.exception("Exception occurred in ML Job {} ".format( self.job_id)) return job_success_status
def parse(e): try: match = Classification.getMatchingExpression(e)[0] centers = {} for symbol in match.symbols: center = symbol.center() centers[str(center[0]) + ' ' + str(center[1])] = symbol keys = [[float(x.split(' ')[0]), float(x.split(' ')[1])] for x in centers.keys()] for symb in e.symbols: center = symb.center() distances = {} for key in keys: distances[euc_dist(center, key)] = key closestDist = distances[min(distances.keys())] closest = centers[str(closestDist[0]) + ' ' + str(closestDist[1])] symb.ident = closest.ident keys.remove(closestDist) e.relations = match.relations # print(match.relations) # match.plot() # relationships = [] # for index, symbol in enumerate(e.symbols[:-1]): # rel = classifyRelationship(symbol, e.symbols[index + 1]) # relationships.append("EO, " + symbol.ident + ", " + e.symbols[index + 1].ident + ", " + rel + ", 1.0\n") # e.relations = relationships return e except: e.relations = [] return e
def train_ANN( self): #training neural networks with examples from chars74k-lite self.classification = Classification.Classifier() self.classification.train_neural_network() _ = joblib.dump(self.classification.network, "classification_weights.pkl", compress=9)
def Test_Derivace(): test_data = [1, 2, 4, 7, 11, 16] derivace = Cl.Derivace(test_data, 1) if (np.allclose(derivace, np.gradient(test_data))): return ("Funguje") else: return ("Nefunguje")
def classify(model, path_source_documents, path_system_summaries, path_reference_summaries): [sourceDocuments, systemSummaries, referenceSummaries ] = loader.loadClassificationData(path_source_documents, path_system_summaries, path_reference_summaries) if len(systemSummaries) > 2: raise Exception('More than two system summaries provided') elif model not in modelFiles: raise Exception('Model does not exist') else: model = classification.loadModel('Models/' + model) prediction = classification.classifyData(model, sourceDocuments, systemSummaries, referenceSummaries) print(prediction)
def crossvalidation(df): y = df.iloc[:,-1:].values.ravel() # tabel label sesuai data ekstraksi X = df.iloc[:,0:-1].values kf = KFold(n_splits=10, shuffle=False) arr_precision, arr_recall, arr_f_measure = [], [], [] count = 0 for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] print("Fold Ke-",count+1) # print("Index Data Latih : ", train_index) # print("Index Data Uji : ", test_index) # PROSES KLASIFIKASINYA DISINI # print("=== Naive Bayes ===") #precision, recall, f_measure = cl.klasifikasi_nb(X_train, X_test, y_train, y_test) # print("=== Support Vector Machine ===") precision, recall, f_measure = cl.klasifikasi_svm(X_train, X_test, y_train, y_test) arr_precision.append(precision) arr_recall.append(recall) arr_f_measure.append(f_measure) count += 1 rata_precision = np.mean(arr_precision) rata_recall = np.mean(arr_recall) rata_fmeasure = np.mean(arr_f_measure) return rata_precision, rata_recall, rata_fmeasure
def getTestAccuracyData(self): classification = Classification.Classification() for i in range(80, 90): doc = reader.documents["essay" + str(i)] dataObjectList = [] annotatedData = set(doc.annotations) for annotation in annotatedData: dataObject = {"annotation": annotation.repr, "labels": annotation.labels.items(), "links": annotation.links} dataObjectList.append(dataObject) data = self.ExtractDataFeatures(dataObjectList, doc.key) preTrainingData = classification.prepareTrainingData(data) # arguments and links Arguments = preTrainingData[0] Links = preTrainingData[1] Arg_word_features = classification.getWordFeatures(Arguments) Link_word_features = classification.getWordFeatures(Links) classification.setWordfeatureSet(Arg_word_features) ArgumentTesting_set = nltk.classify.apply_features(classification.extract_features, Arguments) classification.setWordfeatureSet(Link_word_features) LinksTesting_set = nltk.classify.apply_features(classification.extract_features, Links) return [ArgumentTesting_set,LinksTesting_set]
def GroupClassification(train, test, classifier, param, test_ids, eval_source): ''' Perform classification and post processing given a set of data :param train: train[0]--> list of NO-FATIGUE samples, train[1]--> list of FATIGUE-SAMPLES :param test: test[0]--> list of NO-FATIGUE samples, test[1]--> list of FATIGUE-SAMPLES :param classifier: classifier name ie. 'svm' :param param: classifier parameter :return: Original Confusion Matrix, Confusion Matrix after post-processing,original predicted labels, predicted labels after post-processing ''' CM = numpy.zeros((2, 2)) CM_post = numpy.zeros((2, 2)) # from lists to matrices trNF = numpy.concatenate(train[0]) trF = numpy.concatenate(train[1]) # normalize train features - 0mean -1std features_norm, MEAN, STD = clf.normalizeFeatures([trNF, trF]) #train the classifier model = Classify(classifier, features_norm, param) # TEST for recording in range(len(test[0])): predictions = [] probs = [] test_labels = [0] * test[0][recording].shape[0] + [ 1 ] * test[1][recording].shape[0] test_recording_fVs = numpy.concatenate( (test[0][recording], test[1][recording])) for i in range(test_recording_fVs.shape[0]): fV = test_recording_fVs[i, :] fV = (fV - MEAN) / STD [Result, P] = clf.classifierWrapper(model, classifier, fV) # classification probs.append(numpy.max(P)) predictions.append(Result) for idx, gtlabel in enumerate(test_labels): CM[int(gtlabel), int(predictions[idx])] += 1 post_predictions = postProcessing(predictions) for idx, gtlabel in enumerate(test_labels): CM_post[int(gtlabel), int(post_predictions[idx])] += 1 CompareToInitialStudy(post_predictions, test_ids[recording], eval_source) return CM, CM_post, predictions, post_predictions
def main(): classification = Classification.Classifier() #Classification part classification.do_knn() classification.do_svm() classification.do_random_forest() classification.do_neural_network() detection = Detection.Detector() #sliding window and classification detection.run()
def __init__(self): self.primary_id = None self.other_ids = [] self.name = '' self.description = '' self.indication = '' self.pharmacodynamics = '' self.classification = Classification() self.synonyms = [] self.international_brands = [] self.categories = [] self.sequences = [] self.molecular_weight = '' self.molecular_formula = '' self.pathways_drugs = [] self.pathways_enzymes = [] self.atc_codes = []
def Test_Conf_Mat(): res = np.array([0, 1, 2, 0, 1, 2]) data = np.array([0, 1, 1, 0, 0, 1]) real_result = np.array([2, 0, 0, 1, 1, 0, 0, 2, 0]).reshape(3, 3) if np.allclose(Cl.Confusion_Matrix(res, data, 3, False), real_result): return ("Funguje") else: return ("Nefunguje")
def Test_RF(): test_data = Cl.Moving_Variance(data, 5) real_result = np.load(way + "Unit_test_pro_RF.npy") if (np.allclose(test_data, real_result)): return ("Funguje") else: return ("Nefunguje")
def __init__(self, filename="input.in"): self.__inputs = [] self.__filename = filename self.__inputTerminals = None self.__initialData = None self.__readInputs() self.__system = Classification.GeneticTreeModel( self.__initialData, self.__inputTerminals) self.__view = View.View(self.__system)
def SensorSignals(self): SingleFrame = [] while 1: conn, addr = self.s.accept() buf = conn.recv(1024) x = buf.decode("ascii").split("@") if x[0] == 'true': #start getting data from sensor self.start = True self.playerID = int(x[1]) #playerID from android self.SessionStart = int(x[2]) self.KinectFlag = True #flag sent to kinect server to start getting data continue if self.start == True: #Flag from Kinect to cut and flag for stroke is going forward self.CutStroke, self.Forward = IRCameraSignals.SensorCutStroke( ) sensor = buf.decode("ascii").split("@") if self.Forward is True: for i in range(5): SingleFrame.append(sensor[i]) self.MultiFrames.append(SingleFrame) SingleFrame = [] if self.CutStroke is True: self.FullStrokes.append(self.MultiFrames) Preprocessing.SignalsIntake(self.MultiFrames, self.KinectStroke, self.playerID) self.MultiFrames = [] self.Forward = False if x[0] == 'false': self.start = False self.SessionEnd = int(x[1]) self.KinectFlag = False Session.GetInfoFromSensors(self.SessionStart, self.SessionEnd, self.playerID) Classification.sendSession(True) s.close()
def Test_srovnej(): res_data = np.load(way + 'Unit_test_pro_srovnej_result.npy') data = np.load(way + 'Unit_test_pro_srovnej_stavy.npy') real_result = np.load(way + 'Unit_test_pro_srovnej_kontrola.npy') srovnani = Cl.Srovnej(data, res_data) #původně bylo CL.srovnej(res_data,data) ale po opravě chyby ve funkci srovnej se to musí zadávat takto if (srovnani[0] == 197 and np.allclose(srovnani[1], real_result)): return ("Funguje") else: return ("Nefunguje")
def Test_SZL(): test_data = Cl.Exp_Moving_Mean(np.ones(20), 10) #np.array([Cl.suma_zleva_fce(np.ones(20), i, 10) for i in range(len(np.ones(20)))]) real_result = np.load(way + "Unit_test_pro_SZLF.npy") if (np.allclose(test_data, real_result)): return ("Funguje") else: return ("Nefunguje")
def Test_PaR(): #test Precision_n_Recall res = np.array([0, 1, 2, 0, 1, 2, 2]) data = np.array([0, 1, 1, 0, 0, 1, 2]) PaR = Cl.Precision_n_Recall(res, data, 3, False) real_result = np.load(way + "PaR_real_res.npy") if (np.allclose(PaR, real_result)): return ("Funguje") else: return ("Nefunguje")
def main(argv=None): if argv is None: argv = sys.argv[ 1:] #dirty trick to make this convenient in the interpreter. if (len(argv) < 3 or len(argv) > 4): print(("bad number of args:", len(argv))) print(usage) else: if (len(argv) == 3): exprs, keys = SymbolData.unpickleSymbols(argv[2]) else: exprs = SymbolData.readInkmlDirectory(argv[2], argv[3], True) keys = SymbolData.defaultClasses if (argv[0] == "-nn"): model = Classification.OneNN() elif (argv[0] == "-rf"): model = Classification.makeRF() elif (argv[0] == "-et"): model = Classification.makeET() else: with open(argv[0], 'rb') as f: model = pickle.load(f) #this had better actually be a sklearn model or the equivelent. #things will break in ways that are hard for me to test for if it isn't. symbs = SymbolData.allSymbols(exprs) trained, pca = Classification.train(model, symbs, keys) print("Done training.") if False: f = Features.features(symbs) if (pca != None): f = pca.transform(f) pred = model.predict(f) print("Accuracy on training set : ", accuracy_score(SymbolData.classNumbers(symbs, keys), pred)) #joblib.dump((trained, pca), argv[2]) with open(argv[1], 'wb') as f: pickle.dump((trained, pca, keys), f, pickle.HIGHEST_PROTOCOL)
def main(argv=None): if argv is None: argv = sys.argv[1:] #dirty trick to make this convenient in the interpreter. if (len (argv) < 3 or len (argv) > 4): print(("bad number of args:" , len(argv))) print (usage) else: if (len ( argv ) == 3): exprs, keys = SymbolData.unpickleSymbols(argv[2]) else: exprs = SymbolData.readInkmlDirectory(argv[2], argv[3]) keys = SymbolData.defaultClasses if (argv[0] == "-nn" ): model = Classification.OneNN() elif (argv[0] == "-rf" ): model = Classification.makeRF() elif (argv[0] == "-et" ): model = Classification.makeET() else: with open(argv[0], 'rb') as f: model = pickle.load(f) #this had better actually be a sklearn model or the equivelent. #things will break in ways that are hard for me to test for if it isn't. symbs = SymbolData.allSymbols(exprs) trained, pca = Classification.train(model, symbs, keys) print ("Done training.") if False: f = Features.features(symbs) if (pca != None): f = pca.transform(f) pred = model.predict(f) print( "Accuracy on training set : ", accuracy_score(SymbolData.classNumbers(symbs, keys), pred)) #joblib.dump((trained, pca), argv[2]) with open(argv[1], 'wb') as f: pickle.dump((trained, pca, keys), f, pickle.HIGHEST_PROTOCOL)
def main(argv=[ "RF20_FullDepthBoxFeat.mdl", "trainImgs.mdl", "outPerfectSegBlurred", "test", "testLg", '-pc' ]): #def main(argv=["../../../../..//Desktop/rf.mdl","outSmall","test1","testLg1"]): #def main(argv=["RF20_FullDepth.mdl","RF20_FullDepthLGTest","tmpink","tmplg"]): if argv is None: argv = sys.argv[ 1:] #dirty trick to make this convenient in the interpreter. if (len(argv) < 3 or len(argv) > 6): print(("bad number of args:", len(argv))) print(usage) else: with open(argv[0], 'rb') as f: model, pca, keys = pickle.load(f) Classification.setClassificationModel(model, pca, keys) with open(argv[1], 'rb') as f: trainImgs = pickle.load(f) Classification.setTrainData(trainImgs) # if (len( argv) == 3): # # with open(argv[2], 'rb') as f: # exprs, ks = pickle.load(f) # else: # exprs = SymbolData.readInkmlDirectory(argv[2], argv[3]) tot = len(SymbolData.filenames(argv[3])) i = 0 for f in SymbolData.filenames(argv[3]): print(i, "/", tot) exp = SymbolData.readInkml(f, argv[4], True, argv[5]) truths, preds = Classification.classifyExpressions([exp], keys, model, pca, argv[2], showAcc=True) i += 1
def actionRecognition(skeletons, dispInfo): training = settings.training creatingClusters = settings.creatingClusters creatingActivitySequence = settings.creatingActivitySequence numberOfClubsters = settings.numberOfClubsters activity = settings.activity activityFile = settings.activity.title().replace(" ", "") clusters = settings.clusters words = settings.words skeleton = skeletons[0] #ARRAY OF JOINTS COORDINATES s = PostureFeatureExtration.jointsDetection(skeleton, dispInfo) #ARRAY OF JOINTS NORMALIZED. POSTURE VECTOR f = PostureFeatureExtration.jointsNormalization(s) global activitySequence global lastPosture if training and not creatingClusters and not creatingActivitySequence: #SAVE TRAINING DATA if len(f) > 0: PostureFeatureExtration.saveTrainingData(f, activityFile) #CREATE ACTIVITY SEQUENCE elif training and creatingActivitySequence: postureLabel = ActivityFeatureComputation.createPostureLabel( clusters, f) activitySequence = ActivityFeatureComputation.createActivitySequence( activitySequence, postureLabel, numberOfClubsters) # print "activitySequence",activitySequence if len(activitySequence) == numberOfClubsters: # print 'Activity Sequence', activitySequence wordForActivity = ActivityFeatureComputation.createWordForActivity( activitySequence) # print('Palabra de actividad a guardar: ', wordForActivity) ActivityFeatureComputation.saveWords(wordForActivity, activity) # DETECT ACTIVITY. FINAL FASE elif not training: postureLabel = ActivityFeatureComputation.createPostureLabel( clusters, f) activitySequence = ActivityFeatureComputation.createActivitySequence( activitySequence, postureLabel, numberOfClubsters) if settings.monitorActivity: (lastPosture, settings.counter) = Counter.ActivityCounter( activity, lastPosture, postureLabel, settings.counter) if len(activitySequence) == numberOfClubsters: wordForActivity = ActivityFeatureComputation.createWordForActivity( activitySequence) print('Palabra de actividad a comparar:', wordForActivity) global activityDetected activityDetected = Classification.getActivity( words, wordForActivity) settings.activityDetected = activityDetected
def CNN_classification(test_id, valid_id): input_size = 100 def LoadData(fileList_NF, fileList_F): data = [] labels = [] for im_path in fileList_NF: img = numpy.asarray( image.load_img(im_path, target_size=(input_size, input_size))) data.append(img) labels.append('0') for im_path in fileList_F: img = numpy.asarray(image.load_img(im_path, target_size=(input_size, input_size)), dtype='float64') data.append(img) labels.append('1') return numpy.asarray(data), numpy.asarray(labels) fileList_NF = sorted(glob.glob('medfilt5_label_0/*.png')) fileList_F = sorted(glob.glob('medfilt5_label_1/*.png')) testfileList_NF = filter( lambda x: x.split('/')[-1].split('E')[0] == 'U' + str(test_id), fileList_NF) testfileList_F = filter( lambda x: x.split('/')[-1].split('E')[0] == 'U' + str(test_id), fileList_F) validfileList_NF = filter( lambda x: x.split('/')[-1].split('E')[0] == 'U' + str(valid_id), fileList_NF) validfileList_F = filter( lambda x: x.split('/')[-1].split('E')[0] == 'U' + str(valid_id), fileList_F) trainfileList_NF = filter( lambda x: x.split('/')[-1].split('E')[0] != 'U' + str(test_id) and x. split('/')[-1].split('E')[0] != 'U' + str(valid_id), fileList_NF) trainfileList_F = filter( lambda x: x.split('/')[-1].split('E')[0] != 'U' + str(test_id) and x. split('/')[-1].split('E')[0] != 'U' + str(valid_id), fileList_F) train_data, train_labels = LoadData(trainfileList_NF, trainfileList_F) test_data, test_labels = LoadData(testfileList_NF, testfileList_F) valid_data, valid_labels = LoadData(validfileList_NF, validfileList_F) print train_data.shape, test_data.shape, valid_data.shape clf.CNNclassifier(train_data, train_labels, valid_data, valid_labels, input_size)
def getFunction(mlalgorithm,pDict): """get the function corresponding to the algorithm passed as parameter.""" algList=["SVM", "RandomForest", "DecisionTree", "LogisticRegression","KNearstNeighbors"] cl=Classification(pDict) options={ "SVM":cl.fitSVM, "RandomForest":cl.fitRandomForest, "DecisionTree":cl.fitDecisionTree, "LogisticRegression":cl.fitLogisticRegression, "KNearstNeighbors":cl.fitKNearNeighbors } return options[mlalgorithm]
def main(argv=None): if argv is None: argv = sys.argv[1:] #dirty trick to make this convenient in the interpreter. if (len (argv) < 3 or len (argv) > 4): print(("bad number of args:" , len(argv))) print(usage) else: if (len( argv) == 3): with open(argv[0], 'rb') as f: exprs, ks = pickle.load(f) else: exprs = SymbolData.readInkmlDirectory(argv[2], argv[3]) #model, pca = joblib.load(argv[1]) with open(argv[0], 'rb') as f: model, pca, keys = pickle.load(f) #the following is a placeholder until I am sure we have propper analysis tools for evaluating our results if we preserve files. # symbs = SymbolData.allSymbols(exprs) # print("Normalizing") # symbs = SymbolData.normalize(symbs, 99) # print("Calculating features.") # f = Features.features(symbs) # if (pca != None): # print ("doing PCA") # f = pca.transform(f) # print ("Classifying.") # pred = model.predict(f) # print( "Accuracy on testing set : ", accuracy_score(SymbolData.classNumbers(symbs, classes), pred)) #code to write out results goes here. print ("Classifying") truths, preds = Classification.classifyExpressions(exprs, keys, model, pca, showAcc = True) print ("Writing LG files.") i = 0 for expr in exprs: #if (preds[i] != -1): f = (lambda p: keys[p]) # expr.classes = map (f, preds[i]) expr.writeLG(argv[1],clss = map (f, preds[i]) ) i = i + 1
def Test_FM(): res = np.hstack((np.ones(50), np.zeros(40))) data = np.hstack((np.zeros(40), np.ones(50))) #print(CL.Confusion_Matrix(res, data, 2)) #print(CL.srovnej(res,data,2)) #print("celý",CL.F_Measure(res, data, 2)) FM = Cl.F_Measure(res, data, 2) #print("0" ,round(FM[0][0],3)) #print("1" ,round(FM[0][1],3)) #print("round",round(FM[1], 5)) real_result = [0.889, 0.889, 0.88889] #print(real_result) if ((round(FM[0][0], 3) == real_result[0]) and (round(FM[0][0], 3) == real_result[1]) and (round(FM[1], 5) == real_result[2])): return ("Funguje") else: return ("Nefunguje")
def updatePhoneModelInput(): """Called when new model input is given. Calls required functions to get the plots. :returns: returns Nothing :rtype: none """ global analysis, wedgeList, secondRow, thirdRow, slider, donutCallBackId print(phoneName.value) analysis = None # nimg = Div(text="<img class='theImage' src='SmartPhoneReview/static/images/loading.gif' style=''>") t = phoneName.value print(len(curdoc().roots)) if len(curdoc().roots) > 1: curdoc().clear() initialize(curdoc(), str(phoneName.value)) # curdoc().add_root(nimg) analysis = Classification.classify(str(phoneName.value)) dPlot = analysis["overall"]["donutPlot"].copy() donutPlot, wedgeList = DonutPlot.plot(dPlot, curdoc(), analysis["overall"]["averageRating"]) barPlot = BarPlot.plot(analysis["overall"]["barPlot"]) secondRow = row(barPlot, donutPlot) wc = wordCloud.wordCloud(phoneName.value, analysis["overall"]["tweets"]) gapMinder, slider = gapminderplot.createGapMinder( analysis["gapminderplot"], features, curdoc()) if len(phoneName.value) > 10: t = t.split()[0] img = Div( text="<img class='theImage' src='SmartPhoneReview/static/images/" + "".join(t) + ".png' style=''>") curdoc().add_root(img) curdoc().add_root(gapMinder) curdoc().add_root(secondRow)
class Drug: def __init__(self): self.primary_id = None self.other_ids = [] self.name = '' self.description = '' self.indication = '' self.pharmacodynamics = '' self.classification = Classification() self.synonyms = [] self.international_brands = [] self.categories = [] self.sequences = [] self.molecular_weight = '' self.molecular_formula = '' self.pathways_drugs = [] self.pathways_enzymes = [] self.atc_codes = [] def __init__(self, primary_id, other_ids, name, description, indication, pharmacodynamics, classification, synonyms, international_brands, categories, sequences, molecular_weight, molecular_formula, pathways_drugs, pathways_enzymes, atc_codes): self.primary_id = primary_id self.other_ids = other_ids self.name = name self.description = description self.indication = indication self.pharmacodynamics = pharmacodynamics self.classification = classification self.synonyms = synonyms self.international_brands = international_brands self.categories = categories self.sequences = sequences self.molecular_weight = molecular_weight self.molecular_formula = molecular_formula self.pathways_drugs = pathways_drugs self.pathways_enzymes = pathways_enzymes self.atc_codes = atc_codes def addClassificaion(self, classification): self.classification = classification def printout(self): print '--------Drug:--------' print 'Primary id: ' + self.primary_id print 'Other ids: ' for ids in self.other_ids: print '\t> '+ ids print 'Name: ' + self.name print 'Description: ' + self.description print 'Indication: ' + self.indication print 'Pharmacodynamics: ' + self.pharmacodynamics self.classification.printout() print 'Synonyms: ' for syn in self.synonyms: print '\t> '+ syn print 'International brands: ' for ib in self.international_brands: print '\t> '+ ib print 'Categories: ' for ct in self.categories: print '\t> '+ ct print 'Sequences: ' for seq in self.sequences: seq.printout() #print 'Molecular weight: %f' %self.molecular_weight print 'Molecular weight: ' + str(self.molecular_weight) print 'Molecular formula: ' + self.molecular_formula print 'Pathways drugs: ' for pt in self.pathways_drugs: print '\t> '+ pt print 'Pathways enzymes: ' for ept in self.pathways_enzymes: print '\t> '+ ept # print 'ATC code: ' + self.atc_code print 'ATC codes: ' for cd in self.atc_codes: print '\t> '+ cd print '\n----------------\n'
def load_weights(self): #used for mobility in testing self.classification = Classification.Classifier() self.classification.network = joblib.load("classification_weights.pkl")
#get each book book_count = 1 for typecode, output in query_ref.iter(): if(output=='$'): print '' print '['+str(book_count)+'/10388]' book_count = book_count+1 print buff #print '---------------------------' # sign '#' means end of language section of the book classes = buff[0:buff.index('#')] bookclass="" #print classes for cls in classes: try: cls = Classification.classtrim(cls) if(cls == ValueError): continue try: k = cls.index('.') if(k>2): continue try: k = cls.index('-') for j in cls: if(j=='-'): j = ' ' bookclass = cls break except ValueError as ve: bookclass = cls break except ValueError as ve:
import BaseXClient import Classification import Tree import math import xml.dom.minidom as xmldom from array import * from collections import defaultdict try: session1 = BaseXClient.Session('localhost',1984,'admin','admin') session2 = BaseXClient.Session('localhost',1984,'admin','admin') session1.execute('open keywordXML') session2.execute('create db distinctness') categorylist = Classification.getclassificationrule() # Prepare dictionaries kf = {} siblingcount = {} siblingkeyword = {} distinctness = {} # Create a Category Tree. this will be later used for finding sibling of the category. cattree = Tree.Categorytree() # Count the amount of sibling categories for each category for cat in categorylist: siblingcount[cat] = len(cattree.getSibling(cat)) # Calculate keyword frequency in a category, store it to the kf table
for typecode, ref in query_ref.iter(): if(ref=='$'): classes = buff[0:buff.index('#')] bookclass = "" noticekoha = buff[buff.index('#')+1] title = buff[buff.index('#')+2] # Clean book's class by comparing the 930/a subfields with 995/k subfields ( both 930/a and 995/k can be replicated) # because sometimes these fields are correspond to different value, even though it should always be the same # if they are different, finding the more suitable one to be used as the book's class # for example, sometimes 930/a = D8 ENG/S, while 995/k = D 8.02 SIPP for the same book # from the above example, the 995/k subfield is preferred because it is in the right format (having '.') for cls in classes: try: cls = Classification.classtrim(cls) if(cls == ValueError): continue try: k = cls.index('.') if(k>2): continue try: k = cls.index('-') for j in cls: if(j=='-'): j = ' ' bookclass = cls break except ValueError as ve: bookclass = cls break except ValueError as ve:
def grid_search_for_neighbor(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num_seq): train_X, test_X, train_y, test_y = FetchFile.gen_data(class_num, subsample_size, window_size, rnd_number) #for neighbor_num in [2**i for i in range(neighbor_log2_num)]: for neighbor_num in neighbor_num_seq: Classification.classifiy(class_num, subsample_size, window_size, cluster_num, max_iter, rnd_number, neighbor_num, train_X, train_y, test_X, test_y)