def build(): if not entry1.get() == "": checkBinNum() #get sturucture file pathToStructure = entry1.get() + "\\Structure.txt" pathToStructure = pathToStructure.replace('/', '\\') try: structure = pd.read_csv(pathToStructure, index_col=False, sep='\t') except: popErrorMessage("Error- Empty Files!") # df_structure=pd.DataFrame(structure) # print df_structure #get train set pathToTrainSet = entry1.get() + "\\train.csv" pathToTrainSet = pathToTrainSet.replace('/', '\\') try: _trainSet = pd.read_csv(pathToTrainSet) except: popErrorMessage("Error- Empty Files!") df_trainSet = pd.DataFrame(_trainSet) classifier = Classifier(structure, entry2) updateTrainSet, numericFeaturesArr = classifier.cleanData( pathToTrainSet, df_trainSet) globals()['trainSet'] = updateTrainSet #relase Classify botton classifyBut.config(state="normal") popErrorMessage("Building classifier using train-set is done!")
def run_fbk(seq_record): repeat_regions = [] for feat in seq_record.features: if feat.type == "CDS": scan_seqfeature_translation(seq_record, feat) Classifier.check_known_classes(feat) find_top_kmer(feat) if len(feat.qualifiers["top_kmer_hits"]) > 0: build_table(feat) expand_table(feat) seq_record.annotations["repetitive_sequence_number"] = 0 if len( feat.qualifiers["table"][0] ) > 3: #TODO replace this with assesment module as bool check seq_record.annotations["repetitive_sequence_number"] += 1 if "gene" in feat.qualifiers: repeat_regions.append(feat.qualifiers["gene"]) else: repeat_regions.append("unknown region") make_pattern(feat) feat.qualifiers["has_repeat"] = True seq_record.annotations["repeat_regions"] = repeat_regions should_delete = False for t in feat.qualifiers["table"]: if len(set(list(t))) <= 2: should_delete = True if should_delete: feat.qualifiers["table"] = None feat.qualifiers["has_repeat"] = False del (feat.qualifiers["pattern"])
def buildClicked(self): attrs = Data.getAttributesDictionary(self.path + "\\Structure.txt") trainData = pandas.DataFrame.from_csv(self.path + "\\train.csv", index_col=None) processedData = Data(trainData=trainData, attributes=attrs, numOfBins=self.numOfBins) self.classifier = Classifier(data=processedData) self.classifyButton['state'] = 'normal' tkMessageBox.showinfo("Naive Bayes Classifier", "Building classifier using train-set is done!")
def saveClf(): clf = Classifier() mass = 125 clf.loadData("heavyTrainSet_DS_mass{}.npy".format(mass)) # Extracting features. nComps = 50 print "Extracting features from training data.." startExtractTime = time.time() percentVarCovered = clf.extractFeatsPCA(nComps) print "Original Image Size:", clf.imSet[0].shape print "Number of selected principal components:", nComps print "Percentage of variance covered:", percentVarCovered endExtractTime = time.time() extractTime = endExtractTime - startExtractTime print "Training data feature extraction time:", extractTime, "sec" print # Obtain classifier model and print the classification results on # training data. clf.model.fit(clf.featSet, clf.labelSet) predicts = clf.model.predict(clf.featSet) print "Classification results on training data (mass = {}):".format(mass) getScores(predicts, clf.labelSet, ["Double Chirp", "Not Double Chirp"]) # Save model joblib.dump(clf, "svm_mass{}.joblib".format(mass))
def eval_classifier(G, subs_coo, word_vec): #Sometimes the model doesn't predict anything at all for some inputs. Its either the model's fault or that user has no subscriptions at #all, in that case the model is predicting properly but of course a zero output would raise exceptions during sklearn's #F1 score function. #Currently evaluating performance with OVR Logistic Regression. print("\t**Evaluating classifier performance with the embeddings**") results = Classifier.evaluate(G, subs_coo, word_vec) print("\n Evaluation completed using the following:") for i in results.keys(): print("--> ",i) print("\nPrinting evaluation results : ") trainsize = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] for (name,res) in results.items(): print("\n\nClassifier : ",name) for (tr_size,res_) in zip(trainsize,res): print("\tTraining size : ",tr_size) print("\t\tMicro F1: ",res_[0]) print("\t\tMacro F1: ",res_[1]) avg = np.average(res,axis=0) print("\t---------------------------------------") print("\t Average Micro F1 : ",avg[0]) print("\t Average Macro F1 : ",avg[1]) Classifier.plot_graph(trainsize, res)
def _predictAnnotated(self, input): scores = defaultdict(lambda: -sys.maxint) actual = input.words[input.index][0] best_distance = sys.maxint prev_tokens = True i = 0 for word, loc in input.words: if word == actual and i != input.index: dist = input.index - i if abs(dist) < best_distance: best_distance = abs(dist) prev_tokens = dist >= 0 if i < input.index: scores[word] = max(i, scores[word]) elif i > input.index: scores[word] = max(i - len(input.words), scores[word]) i += 1 results = {word:score for word, score in scores.items() if Classifier._matchesBase(input.input.base, word)} for w in self.words: if len(results) > 50: break elif Classifier._matchesBase(input.input.base, w) and w not in results.keys(): results[w] = -sys.maxint if prev_tokens: r = sorted(results.items(), key=lambda (word,score): score, reverse=True) return r else: r = sorted(results.items(), key=lambda (word,score): score, reverse=False) return r
def Train(data, epochs, batch_size): if use_rnn: model = C.GRUClassifier(embedding_dim=data.embedding.shape[1], hidden_dim=hidden_dim, label_size=nlabel, batch_size=batch_size, embedding_weights=data.embedding, bidirectional=BiDirection) else: model = C.CNNClassifier(DIM_EMB=data.embedding.shape[1], NUM_CLASSES=nlabel, NUM_FILTERS=nfilter, embedding_weights=data.embedding) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=0.001) loss_function = nn.CrossEntropyLoss() for epoch in range(epochs): total_loss = 0.0 for i, batch in enumerate(data.train_iter): # feature, label = batch.sentence, batch.label (feature, batch_length), label = batch.sentence, batch.label optimizer.zero_grad() output = model(feature, batch_length) # print(output) loss = loss_function(output, label) total_loss += loss loss.backward() optimizer.step() print(f"loss on epoch {epoch} = {total_loss}") return model
def runTest(self): Classifier.classify_edits(self.edits) predictions = [edit.lexical_entailment for edit in self.edits] for prediction, edit in zip(predictions, self.edits): if prediction != 3: print prediction, edit self.assertEqual(predictions, self.target)
def __getClassifier(self): if self.__isClassifierExists(): cls = self.__loadModel() else: cls = Classifier() cls.SetLogger(self) return cls
def show(request, filter_id=0): """based on the current user's filer, rank the items in source, and show out. current one only show the default classifier(last one). Could be extended to show different classifier. """ errors = [] c = {} c['username'] = request.user.username c['filter_id'] = filter_id if filter_id == 0: user = auth.get_user(request) temp = user.profile.default_filter if user.profile.default_filter is None: errors.append('plase train a filter first!') return render_to_response('show.html', { 'username': request.user.username, 'errors': errors }) else: classifier = Classifier(user=auth.get_user(request), id=user.profile.default_filter.id) classifier.load() items = Item.objects.all() p_label, p_acc, p_val = classifier.predict(items) tempList = map(lambda x, y: [x, y], items, p_val) tempList = sorted(tempList, key=itemgetter(1), reverse=True) c['items'] = map(lambda x: x[0], tempList[:20]) return render_to_response('show.html', c)
def predict(data, samples, classifier='SVM', classification='combined', selectFeatures=('CUK', 10)): """ Learns the data-set with the given classifier and gives a prediction for each of the samples. """ if (classification == "trained"): classifyTrained = True classifySurface = False elif (classification == 'surface'): classifyTrained = False classifySurface = True else: classifyTrained = True classifySurface = True if (classifier == "SVM"): clf = cl.classifyDataSVM(data, classifyTrained, classifySurface, selectFeatures,scaling=False) elif (classifier == "DT"): clf = cl.classifyDataDT(data, classifyTrained, classifySurface, selectFeatures,scaling=False) elif (classifier == "KNN"): clf = cl.classifyDataKNN(data, classifyTrained, classifySurface, selectFeatures,scaling=False) elif (classifier == "LogReg"): clf = cl.classifyDataLR(data, classifyTrained, classifySurface, selectFeatures,scaling=False) else: print (str(classifier) + " is not a valid option") [samples, _,_,_] = clf.extractData(samples,scaling=False) predictions = [clf.predict(s) for s in samples] return predictions
def runTest(self): Classifier.classify_edits(self.edits) predictions = [edit.lexical_entailment for edit in self.edits] for prediction, edit in zip(predictions, self.edits): if prediction != 3: print prediction, edit self.assertEqual(predictions, self.target)
def main(): """Main handler for running the experiments. Command line args: 1: The organism to run the experiments on. """ positiveSamples, negativeSamples = SampleParser.parseTrainingSamples() print "Number of positive training samples: " + str(len(positiveSamples)) print "Number of negative training samples: " + str(len(negativeSamples)) if len(sys.argv) != 2: sys.exit("Wrong number of arguments") organism = sys.argv[1] methods = ('SVM', 'NB') pos = [] neg = [] for sign in ['pos', 'neg']: for i in range(0, len(methods)): classifier, count = Classifier.train(positiveSamples, negativeSamples, methods[i]) seqs = SampleParser.readFile(Constants.getTestData(organism, sign)) if sign == 'pos': print "Number of positive testing samples: " + " " + str( len(seqs)) else: print "Number of negative testing samples: " + " " + str( len(seqs)) predicted = Classifier.predict(classifier, seqs, count) incorrect = 0 correct = 0 for p in predicted: if sign == "pos": if p == 1: correct = correct + 1 elif p == 0: incorrect = incorrect + 1 elif sign == "neg": if p == 0: correct = correct + 1 elif p == 1: incorrect = incorrect + 1 if sign == "pos": pos.append(Decimal(correct) / Decimal(correct + incorrect)) elif sign == "neg": neg.append(Decimal(correct) / Decimal(correct + incorrect)) tot = [] for i in range(0, len(methods)): tot.append((pos[i] + neg[i]) / Decimal(2)) print "--- SUMMARY ---" for i in range(0, len(methods)): print "--- RESULTS for " + str(methods[i] + " ---") print "Positive: " + str(pos[i]) print "Negative: " + str(neg[i]) print "Total: " + str(tot[i]) ResultPrinter.plot(pos, neg, tot, methods, organism)
def ShowResultsTab4(self): if (not self.listWidget_2.currentItem()): return fileName = self.listWidget_2.currentItem().text() fileURL = self.Tab4DirPath + '/' + fileName if (os.path.splitext(fileName)[1] == ".mat"): if (self.lineEdit_4.text().__len__() is 0): kflod = 5 else: kflod = int(self.lineEdit_4.text()) if (self.lineEdit_5.text().__len__() is 0): numWave = 128 else: numWave = int(self.lineEdit_5.text()) if (self.lineEdit_6.text().__len__() is 0): numLearn = 30 else: numLearn = int(self.lineEdit_6.text()) if (self.lineEdit_3.text().__len__() is 0): iterNum = 5 else: iterNum = int(self.lineEdit_3.text()) self.textEdit.clear() accLDA, self.matLDA = Classifier.LDA(fileURL, iterNum, kflod) text = 'LDA,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format( kflod, iterNum, accLDA) self.textEdit.append(text) accSVM, self.matSVM = Classifier.SVM(fileURL, iterNum, kflod) text = 'SVM,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format( kflod, iterNum, accSVM) self.textEdit.append(text) accKNN, self.matKNN = Classifier.KNN(fileURL, iterNum, kflod) text = 'KNN,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format( kflod, iterNum, accKNN) self.textEdit.append(text) accDT, self.matDT = Classifier.DT(fileURL, iterNum, kflod) text = 'DecisionTree,{0}折交叉验证,迭代{1}次,准确率为:{2}'.format( kflod, iterNum, accDT) self.textEdit.append(text) accSDE, self.matSDE = Classifier.SDE(fileURL, kflod, numLearn, numWave) text = 'SDE,{0}折交叉验证,{1}个弱学习器,子空间维数为{2}时,准确率为:{3}'.format( kflod, numLearn, numWave, accSDE) self.textEdit.append(text) if (os.path.splitext(fileName)[1] == ".xls"): pass if (os.path.splitext(fileName)[1] == ".csv"): pass
def PCA_func(): X, y = load_svmlight_file('./resource/向量化后_带上下文信息_everyline.svmdata') print Classifier.trainTestReportRF(X.toarray(),y,0.70) for ite in range(1,31): print 'ite:'+str(ite)+'\t', pca = PCA(n_components=ite) newX = pca.fit_transform(X.toarray()) Classifier.trainTestReportRF(newX,y,0.70)
def runOneTrial(books): trainingList, testingList = splitBooksIntoTrainingAndTestingSet(books) Classifier.train(trainingList) accuracy = Classifier.test(testingList) print("\nclassifier accuracy: " + str(accuracy)) print("\nnumber of books used: " + str( len(books))) return accuracy
def get_tags(): Title = request.args.get('title') Body = request.args.get('body') t = str(Title) + str(Body) txt = cl.stemm_stop(cl.clean(t)) x = cl.TFIDF.transform([txt]).toarray() res = cl.lr.predict(x)[0] Tags = list(fb.get_ferq_with_txt(txt, [res])) data = {'title': Title, 'body': Body, 'tags': Tags} return jsonify(data)
def train(self): global Models name = self.eval_data.name if name in Models: self.trainer = Models[name] else: Models[name] = self.trainer self.trainer.train(self.train_data, persist=False) self.trainer.train_gist(self.train_data, persist=False) self.classifier = Classifier(self.trainer) return self
def Main(): dataReader = DataReader() allUserData = dataReader.loadData( "DSL-StrongPasswordData") #loads all users data classifier = Classifier() scalar = 1.0 scalarCap = 1.6 dimDeviation = 1 dimCap = 21 while (dimDeviation < dimCap): print "testing dims: " + str(dimDeviation) for k in range(0, 50): correct_person_accuracy = [] wrong_person_accuracy = [] owner_index = k # index for the user that is to be tested first_time = True # temp variable for checking if first time creating test_data_wrong #print "testing for person "+str(k)+" created!" for i in range(0, 50): userDataRaw = allUserData[i] #data from 1 user userData = dataReader.formatData( userDataRaw ) #formats data (strips user and session ids etc), returns Matrix. if i == owner_index: np.random.shuffle( userData ) # Shuffle to get data from different sessions person1 = DataCluster( userData[0:300], scalar) # creates the person to be tested test_data_right = userData[300:] # print test_data_right else: if first_time: test_data_wrong = userData first_time = False else: test_data_wrong = np.concatenate( (test_data_wrong, userData), axis=0) correct_person_accuracy.append( classifier.compare_all(person1, test_data_right, True, dimDeviation)) wrong_person_accuracy.append( classifier.compare_all(person1, test_data_wrong, False, dimDeviation)) print "False recognition rate: " + str( 1 - np.mean(correct_person_accuracy)) print "False acceptance rate: " + str(1 - np.mean(wrong_person_accuracy)) # scalar += 0.1 dimDeviation += 1
class CarbonaraBros(): def __init__(self, relevant_threshold=0.8): self.fe = FeaturesExtractor() self.relevant_threshold = relevant_threshold self.tableClassifier = Classifier('models/table_classifier.h5') self.listClassifier = Classifier('models/list_classifier.h5') def processDom(self, dom): analysis = { 'table': { 'relevant': [], 'not_relevant': [], }, 'list': { 'relevant': [], 'not_relevant': [] } } # table for table in dom.xpath("//table"): features = self.fe.extract( table, selected=DefaultFeatures.table_selected, features_descriptor=DefaultFeatures.table) features_array = self.fe.toArray(features) probabilities = self.tableClassifier.classify(features_array) score = probabilities[1] if score >= self.relevant_threshold: analysis['table']['relevant'].append((score, table)) else: analysis['table']['not_relevant'].append((score, table)) lists = dom.xpath("//ul") lists = lists + dom.xpath("//ol") lists = lists + dom.xpath("//dl") for list in lists: features = self.fe.extract( list, selected=DefaultFeatures.list_selected, features_descriptor=DefaultFeatures.list) features_array = self.fe.toArray(features) probabilities = self.listClassifier.classify(features_array) score = probabilities[1] if score >= self.relevant_threshold: analysis['list']['relevant'].append((score, list)) else: analysis['list']['not_relevant'].append((score, list)) return analysis
def testPCAFit(): dat = np.load("heavyTrainSet_noDS.npy") # dat = dat[: 600] # clf = Classifier(svm.SVR(kernel="linear", gamma="auto")) clf = Classifier(svm.SVC(kernel="linear", gamma="auto", probability=True)) # Loading data print "Loading training data.." clf.imSet, clf.labelSet = DataFactory.getTrainableArrays(dat) # Extracting features ncomps = 30 print "Extracting features from training data.." startExtractTime = time.time() percentVarCovered = clf.extractFeatsPCA(ncomps) endExtractTime = time.time() extractTime = endExtractTime - startExtractTime print "Original Image Size:", clf.imSet[0].shape print "Number of selected principal components:", ncomps print "Percentage of variance covered:", percentVarCovered print "Training data feature extraction time:", extractTime, "sec" print numIns = len(clf.featSet) shuffIndices = range(numIns) # np.random.shuffle(shuffIndices) shuffFeats = clf.featSet[shuffIndices] shuffLabels = clf.labelSet[shuffIndices] confMat = np.array([[0, 0], [0, 0]]) print "Start training.." clf.model.fit(shuffFeats, shuffLabels) print "Start predicting.." probs = clf.model.predict_proba(shuffFeats) assert probs.shape == (numIns, 2) for i, prob in enumerate(probs): if dat[shuffIndices[i]].hasDoubleChirp: if prob[0] > 0.5: confMat[0, 0] += 1 else: confMat[0, 1] += 1 else: if prob[0] <= 0.5: confMat[1, 1] += 1 else: confMat[1, 0] += 1 print "Training accuracy:", 1.0 * (confMat[0, 0] + confMat[1, 1]) / numIns print "Total number of fails:", confMat[0, 1] + confMat[1, 0] print "Confusion Matrix" printConfMat(confMat, ["DoubleChirp", "NotDoubleChirp"])
def __init__(self, config): self.config = self._Parameters(config) if self.config.classifier_method == 'svc': self.classifier = Classifier.SVC(config) elif self.config.classifier_method == 'sgd': self.classifier = Classifier.SGD(config) elif self.config.classifier_method == 'random forest': self.classifier = Classifier.RandomForest(config) elif self.config.classifier_method == 'bagging svc': self.classifier = Classifier.BaggingSVC(config) elif self.config.classifier_method == 'logistic regression': self.classifier = Classifier.LogisticRegression(config)
def build_handler(): try: #setNumOfBins() global numOfIntervals toCheck = e2.get() if toCheck == "": showinfo( "Naive Bayes Classifier", "Please insert an integer for the Discretization bins attribute" ) return numOfIntervals = int(toCheck) except: showinfo("Naive Bayes Classifier", "Discretization bins must be an integer") return if numOfIntervals < 2: showinfo("Naive Bayes Classifier", "Discretization bins must be at least 2") return if os.stat(pathToStructure).st_size == 0: showinfo("Naive Bayes Classifier", "The file Structure.txt is empty. Please load valid files") return structure_file = open(pathToStructure, "r") try: dfTrain = pd.read_csv(pathToTrain) except Exception as e: if e.__str__() == "No columns to parse from file": showinfo("Naive Bayes Classifier", "The file train.csv is empty. Please load valid files") else: showinfo("Naive Bayes Classifier", "The file train.csv has errors. Please load valid files") totalNumOfRecords_train = dfTrain.shape[0] # num of records if numOfIntervals > totalNumOfRecords_train: showinfo( "Naive Bayes Classifier", "Discretization bins must not be grater than the number of train set records" ) return global dfTrainFinal dfTrainFinal = pp.preProcess(structure_file, dfTrain, numOfIntervals) structure_file = open(pathToStructure, "r") attribute_values_dict = pp.set_attribute_values_dict(structure_file) cl.prepareModel(dfTrainFinal, pathToStructure, numOfIntervals, attribute_values_dict) classify_Button.config(state='normal') showinfo("Naive Bayes Classifier", "Building classifier using train-set is done!")
def runTest(self): Classifier.classify_edits(self.edits) predictions = [edit.lexical_entailment for edit in self.edits] num_incorrect = 0 for prediction, edit in zip(predictions, self.edits): if prediction != 2: num_incorrect += 1 print 'Predicted: %s, target: 2' % prediction print edit print num_incorrect, len(predictions) print '%s percent correct' % ( (len(predictions) - num_incorrect) / len(predictions) * 100) self.assertEqual(predictions, self.target)
def runTest(self): Classifier.classify_edits(self.edits) predictions = [edit.lexical_entailment for edit in self.edits] num_incorrect = 0 for prediction, edit in zip(predictions, self.edits): if prediction != 1: num_incorrect += 1 print 'Predicted: %s, target: 1' % prediction print edit print num_incorrect, len(predictions) print '%s percent correct' % ( (len(predictions) - num_incorrect) / len(predictions) * 100) self.assertEqual(predictions, self.target)
def run(pdfpath, toPath): if not os.path.exists(r'./RandomForestScikitModel'): Classifier.outputModel() title, authorInfo, header , predictLabel = PdfProcessor.run(pdfpath) dicSet = [] output = '' output += '[Title]:' + title + '\n\n' for author in authorInfo: output += author.toString() + '\n' dicSet.append(author.toDic()) print output open(toPath+'/'+os.path.split(pdfpath)[1].replace('.pdf','.txt'), 'w').writelines(output) return title, dicSet, header, predictLabel
def __init__(self): super(VehicleDetector, self).__init__() # Sliding windows self.yStart = 400 self.yStop = 650 self.x_overlap = 0.65 self.y_overlap = 0.75 # Filter self.filterThreshold = 2 self.filter = F.Filter(self.filterThreshold) # Print summary to check correct parameters self.Summary() # Sub-components self.renderer = R.Renderer() self.database = D.Database() cars, notcars = self.database.GetListOfImages() self.classifier = C.Classifier(cars, notcars, loadFromFile=True, database=self.database) # Output video parameters self.outputToImages = 0 self.outputVideoName = self.database.GetOutputVideoPath() # Train classifier ? self.trainClassifier = 1 # TODO: implement the loading # Bounding boxes self.bboxes = self.LoadSlidingWindows()
def test(): FilePathReadStr = 'Data/Classified/' FilePathWriteStr = 'Data/DataProcessing/' TextList = Filter(FilePathReadStr + 'Boston.txt') # 过滤 CreateDir(FilePathWriteStr + 'Boston/') Classifier.WriteFileLine(FilePathWriteStr + 'Boston/' + 'FilterData.txt', TextList, 'w') # 文件结构建立重新写
def main(): print('---正在读取数据并降维---') data = np.empty([110, 10000], np.float32) for idx in range(110): image = Image.open('Data/s' + str(idx + 1) + '.bmp') data[idx] = np.reshape(image, [10000]) file = open('Data/labels.txt') label = np.array(file.readline().strip('\n').split(','), np.int32) ''' 算法的调用 ''' data_reduced = mds_func(data) # data_reduced = isomap_func(data) # data_reduced = le_func(data) # data_reduced = lle_func(data) classifier = Classifier.Classifier(20) for repeat in range(500): for idx in range(110): if idx % 11 != 0: classifier.fit(data_reduced[idx], label[idx]) sys.stdout.write('\r正在训练,已完成 %.1f%%' % (repeat * 100 / 500)) sys.stdout.write('\r训练完毕,下面开始测试\n') correct_times = 0 for idx in range(10): val = classifier.classify(data_reduced[idx * 11]) print('第 %2d 次预测值:%d,真实值:%d' % (idx + 1, val, label[idx * 11])) if val == label[idx * 11]: correct_times += 1 print('测试完毕,准确率:%.2f%%' % (correct_times * 100 / 10))
def __init__(self): super(Test, self).__init__() self.database = D.Database() cars, notcars = self.database.GetListOfImages() self.classifier = C.Classifier(cars, notcars, loadFromFile=True, database=self.database) self.renderer = R.Renderer() self.vehicleDetector = V.VehicleDetector()
def check_success(userid): data = {} try: print("check" + str(userid)) ticketId = request.form['id'] details = request.form['details'] timestamp = request.form['timestamp'] print(id) print(details) print(timestamp) # initialize list of lists newdata = [[details]] # Create the pandas DataFrame df = pd.DataFrame(newdata, columns=['sentence']) modelFilePath = uploadDir + '//' + str(userid) + '//' + str( userid) + ".joblib" featuresFilePath = uploadDir + '//' + str(userid) + '/features.pkl' dataFilePath = uploadDir + '//' + str(userid) + '//' + str( userid) + ".data" df.to_csv(dataFilePath, index=False) predictedPriority = classifier.check(featuresFilePath, modelFilePath, dataFilePath) print(predictedPriority) data['message'] = {} data['message']['id'] = ticketId data['message']['priority'] = int(predictedPriority) except Exception as ex: print(ex) template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) data['error'] = "Exception occured" #return render_template("success.html", name = "123") return jsonify(data)
def classifier(): if request.method == 'POST': data = request.data dataDict = json.loads(data_test) text = dataDict.get('text') elif request.method == 'GET': text = request.args.get('text') textToSend = [] textToSend.append(text) """CREO IL BANCH""" dataToTest = Bunch(data=textToSend, filenames="", target="") text_embedded = doc2vec.infer_vector(textToSend[0].split()) categoriesSimple = Classifier.start(neuralNetworks, dictionaries, normalizers, dataToTest, 10, "root", path, text_embedded) json_simple = json.dumps(categoriesSimple) body = "{\"simple\":" + json_simple + "}" response = Response(body, mimetype='application/json') response.headers.add('content-length', str(len(body))) response.headers.add('Access-Control-Allow-Origin', '*') return response
def build(self): #try: self.train = pd.read_csv(self.entryPath.get() + "/train.csv") if self.validate(self.entryDiscBins.get()): # load train file, test file and structure file if (os.path.getsize(self.entryPath.get() + "/Structure.txt") == 0): raise Exception("The structure file is empty") self.structureFile = open(self.entryPath.get() + "/Structure.txt") self.fileHandler = FilesHandler() self.structureDic = self.fileHandler.createStstructureDic(self.structureFile) self.dataCleaner = DataCleaner(self.structureDic, self.numOfBins) self.toLowerCase("train") self.train = self.dataCleaner.trainCleaning(self.train) self.classifier = Classifier(self.train, self.entryPath.get(), self.structureDic, self.numOfBins) self.wasBuilt = True tkMessageBox.showinfo("Build Message", "Building classifier using train-set is done!")
def SVMDraw(data, clf, histogram): fig = plt.figure() ax = Axes3D(fig) labels = Classifier.getLabels(clf, data) Xs, Ys, Zs, sizes, colors = Utils.getDrawInfo(data, histogram, labels) scatter = ax.scatter(Xs, Ys, Zs, s=sizes, c=colors) plt.show()
def train(config): c = Classifier.classifier(config) dataroot = config.get("train", "dataroot") dataset = config.get("train", "dataset") dw = sutils.dataset_walker(dataset=dataset, dataroot=dataroot, labels=True) c.train(dw) c.save(config.get("train", "output"))
def classify(): global data classifier = Classifier(test=data[3], structure=data[0], train=data[1], meta_data=data[2], bins=bins_num) output = open(filename + "/output.txt", "a") i = 1 for classification in classifier.classify(): output.write(str(i) + " " + str(classification) + "\n") i += 1 output.close() messagebox.showinfo("Naive Bayes Classifier", "Classification is done!") root.destroy() sys.exit(0)
def main(): CityFileNameStr = 'Data/City/City.txt' FilePathReadStr = 'Data/Classified/' FilePathWriteStr = 'Data/DataProcessing/' CityList = (Classifier.CreateCityDict(CityFileNameStr)).keys() #创建城市列表 for i in CityList: #进行过滤和文件重新整理 try: TextList = Filter(FilePathReadStr + i + '.txt') #过滤 CreateDir(FilePathWriteStr + i + '/') Classifier.WriteFileLine(FilePathWriteStr + i + '/' + 'FilterData.txt', TextList, 'w') #文件结构建立重新写入 except Exception as e: print str(e) + '\t' + i
def predict (scalers, classifiers, scores, info, output) : global buf info ("start predict") shcmd = "arecord -t raw -c 2 -r 2000 -f S16_LE - 2>/dev/null" proc = subprocess.Popen (shcmd, stdout = subprocess.PIPE, shell = True) read_thread = readdataThread (proc.stdout) read_thread.daemon = True read_thread.start () count = 0 p = [0] * Classifier.NUM_OF_LABELS while True : if len (buf) >= Classifier.WINDOW_SIZE * 4 : data1, data2 = getdata (buf[-Classifier.WINDOW_SIZE * 4:]) buf = buf[-(Classifier.WINDOW_SIZE - 50) * 4:] X = extract_feature(data1, data2) tp = Classifier.multi_classification([X], scalers, classifiers, scores)[0] p[tp] += 1 count += 1 if count >= 5 : maj = p.index (max (p)) count = 0 p = [0] * Classifier.NUM_OF_LABELS output (maj) sys.stdout.flush () read_thread.join ()
def evalKNN(data, classifyTrained, classifySurface, selectFeatures): classifier = cl.classifyDataKNN(data, classifyTrained, classifySurface, selectFeatures) classifier.crossValidation() # classifier.showKNeighborsGraph() if (plotSurfaces): classifier.plotDecisionSurface()
def calc(datasetIndex, multiplierInt): csv = pd.DataFrame(columns=['dataset', 'bins', 'f1', 'zero-one']) exp = ((multiplierInt + 1) / 2) bins = math.ceil(2**exp) results = [] for k in range(trials): dp = DataProcessor.DataProcessor(bin_count=bins) binnedDataset = dp.StartProcess(datasets[datasetIndex]) N, Q, F, testData = train(binnedDataset) model = Classifier.Classifier(N, Q, F) classifiedData = model.classify(testData) stats = Results.Results() zeroOne = stats.ZeroOneLoss(classifiedData) macroF1Average = stats.statsSummary(classifiedData) datapoint = { 'dataset': dataset_names[datasetIndex], 'bins': bins, 'f1': macroF1Average, 'zero-one': zeroOne / 100 } print(datapoint) csv = csv.append(datapoint, ignore_index=True) # trial = {"zeroOne": zeroOne, "F1": macroF1Average} # results.append(trial) # print(trial) data.append(csv)
def getNbestTreeFeaturesPos(data, n, Klassifizierer="Forest"): end = len(data[1,:]) xlf, X_train, X_test, y_train, y_test = Classifier.classify(data,range(2,end),classifier=Klassifizierer) z = (xlf.feature_importances_) z= np.array(z) k = z.argsort()[-n:][::-1] newlist = [x+2 for x in k] return newlist
def test(testText, dictionary): classCntTxt = FileIO.readFile('corpus\classCount.txt'); classCnt = {}; for line in classCntTxt.split('\n'): statistics = line.split('\t'); if len(statistics) > 1: classCnt[statistics[0]] = statistics[1]; return Classifier.naiveBayes(dictionary, classCnt, testText);
def evalDT(data, classifyTrained, classifySurface, selectFeatures): classifier = cl.classifyDataDT(data, classifyTrained, classifySurface, selectFeatures) classifier.crossValidation() # classifier.showFeatureImportances() classifier.createTreePdf() if (plotSurfaces): classifier.plotDecisionSurface()
def evalSVM(data, classifyTrained, classifySurface, selectFeatures): classifier = cl.classifyDataSVM(data, classifyTrained, classifySurface, selectFeatures) classifier.crossValidation() # classifier.showProperties() # classifier.showSupportVectors() # classifier.showSelectedFeatures() if (plotSurfaces): classifier.plotDecisionSurface()
def predict(self): files = os.listdir(DIR_TO_PREDICT) result = [] for f in files: it = clock() c_class = Classifier._predict(DIR_TO_PREDICT+f, 1, PROTOTYPE, MODEL, FUNCTIONS) result.append(report(f, c_class, clock()-it)) return result
def train(self): global Models name = self.eval_data.name if name in Models: self.trainer = Models[name] else: Models[name] = self.trainer.train(self.train_data, persist=False) self.classifier = Classifier(self.trainer) return self
def predict(self): files = os.listdir(DIR_TO_PREDICT) result = [] for f in files: it = clock() c_class = Classifier._predict(DIR_TO_PREDICT+f, 1, "prototypes.trained", "model_w2v.mm", "ffunctions.mm") result.append(report(f, c_class, clock()-it)) return result
def trainCascadeClassifier(maxFPR=Util.DEFAULT_MAX_FPR, minDR=Util.DEFAULT_MIN_DR, targetFPR=Util.DEFAULT_TARGET_FPR, posSampleNum=200, posSamplePath=Util.DEFAULT_FACE_IMAGE_PATH_PREFIX, negSampleNum=400, negSamplePath=Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX, partitionPercentage=Util.DEFAULT_PARTITION_PERCENTAGE, jsonFile=Util.DEFAULT_JSON_FILE): trainPosSetSize = int(posSampleNum * partitionPercentage) trainNegSetSize = int(negSampleNum * partitionPercentage) validPosSetSize = posSampleNum - trainPosSetSize validNegSetSize = negSampleNum - trainNegSetSize if not (posSamplePath == Util.DEFAULT_FACE_IMAGE_PATH_PREFIX): Util.DEFAULT_FACE_IMAGE_PATH_PREFIX = posSamplePath if not (negSamplePath == Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX): Util.DEFAULT_NON_FACE_IMAGE_PATH_PREFIX = negSamplePath sampleImageBundle = Util.getSampleImageSet(posSampleNum, posSamplePath, negSampleNum, negSamplePath, partitionPercentage) # sampleImageBundle = tuple([tuple([posTrainSampleSet, negTrainSampleSet]), # tuple([posValidSampleSet, negValidSampleSet])]) if not sampleImageBundle: print 'Reduce the number of sample images, you don\'t so many data.' return None sampleSizeBundle = tuple([tuple([trainPosSetSize, trainNegSetSize]), tuple([validPosSetSize, validNegSetSize])]) print '===== start constructing the cascade classifier and json file =====' cascadeClassifier = Classifier.getCascadeClassifier(maxFPR, minDR, targetFPR, sampleImageBundle, sampleSizeBundle) jsonCascadeClassifier = cascadeClassifier.jsonEncode() with open(jsonFile, 'w') as outputJsonFile: json.dump(jsonCascadeClassifier, outputJsonFile) print '===== complete the training of cascade classifier =====' print '===== please check the output file json file ', jsonFile, ' =====' return jsonCascadeClassifier
def train(rawdata1, rawdata2, y, info): info ("start training") X = [] X1 = rawdata1 X2 = rawdata2 y_2 = [] for yi, x1, x2 in zip(y, X1, X2): for i in range(500, 1500, Classifier.WINDOW_SHIFT_TRAIN): X.append( extract_feature( x1[i: i+Classifier.WINDOW_SIZE], x2[i: i+Classifier.WINDOW_SIZE]) ) y_2.append( yi ) y = y_2 scalers, classifiers, scores = Classifier.gen_model(X, y, verbose=False) info ("finish training") return scalers, classifiers, scores
def __init__(self,X,y,ratio): len = int(ratio*X.shape[0]) self.X = X[:len] self.y = y[:len] self.X_testT = X[len:] self.y_testT = y[len:] self.X_test = X[len:] self.y_test = y[len:] #self.X_test = X[:len] #self.y_test = y[:len] self.sums=np.zeros(self.y_test.shape) self.W=np.ones((self.X_test.shape[0],1)).flatten(1)/self.X_test.shape[0] self.M = 20 self.G={} for i in range(0,self.M): self.G[i] = Classifier.randomSampleRandomAlgorithmForWeakClf(self.X, self.y, 0.75)
def train(self): return Classifier._fit("./toTrain/Malw", "./toTrain/NoMalw", "prototypes.trained", "model_w2v.mm", "ffunctions.mm")
def runTest(self): Classifier.classify_edits(self.edits) predictions = [edit.lexical_entailment for edit in self.edits] print 'Edit:\n%s\nTarget: %s\nPrediction: %s' % ( self.edits[0], self.target[0], predictions[0]) self.assertEqual(predictions[0], self.target[0])
def train(self): return Classifier._fit(MALW_PATH, NOMALW_PATH, PROTOTYPE, MODEL, FUNCTIONS)
Time: %d """ % (fname,cl,t) if __name__ == "__main__": logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) system('cls' if name == 'nt' else 'clear') header() if len(argv)<2: usage(); exit() if argv[1]=="--train": if len(argv)!=7: usage(); exit() train_malware_path = argv[2] train_non_malware_path = argv[3] fprototypes = argv[4] f2wvmodel = argv[5] ffmodel = argv[6] Classifier._fit(train_malware_path,train_non_malware_path,True,fprototypes,f2wvmodel,ffmodel) elif argv[1]=="--predict": if len(argv)!=7: usage(); exit() exe_file,k,path_prototypes,path_w2v_model,path_f_model = argv[2],int(argv[3]),argv[4],argv[5],argv[6] it = clock() c_class = Classifier._predict(exe_file,k,path_prototypes,path_w2v_model,path_f_model) report(exe_file,c_class,clock()-it) elif argv[1]=="--statistics": if len(argv)!=6: usage(); exit() type_statistics = argv[2] k = int(argv[3]) train_malware_path = argv[4] train_non_malware_path = argv[5] if type_statistics=="-lou": Statistics._leaving_one_out(train_malware_path,train_non_malware_path,k)
i = i + 1 list_models = [] print "Loading models" #Load the models for j in range(len(name_models)): #For the moment don't put True is there are more that 2 models in Ubuntu gm = loadModel(name_models[j],th[j],False) list_models.append(gm) print "Calculating weigths" #Used to calculate the weights v0 = Classifier() for j in range(len(name_models)): print "\nFor model " + name_models[j] + ":" w_g, w_b = v0.calculateW(list_files[j],list_models[j]) list_models[j].addWeight("gravity",w_g) list_models[j].addWeight("body",w_b) print "\n Init classifers" l_class = [] for j in range(len(name_models)): l_class.append(Classifier())
import re drname="handshape" from Classifier import * import sys sys.path.insert(0,'/home/lzz/project/project/lstm/') import lstm.RNN_with_gating #import whole_network,whole_level_network if __name__ == '__main__': #caffedl=caffeDL('/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/proto/lenet_test.prototxt','/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/model/lenet_iter_5000.caffemodel') caffedl=caffeDL('/home/lzz/caffe/caffe-master/examples/imagenet/train_val_16_py.prototxt','/home/lzz/caffe/caffe-master/examples/imagenet/model/4096_iter_10000.caffemodel') #caffedlInter=caffeDL('/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/proto_inter/lenet_test.prototxt','/media/lzz/65c50da0-a3a2-4117-8a72-7b37fd81b574/sign/model/lenet__iter_400.caffemodel') caffedlInter=caffeDL('/home/lzz/caffe/caffe-master/examples/imagenet/intermodel/train_val_inter.prototxt','/home/lzz/caffe/caffe-master/examples/imagenet/intermodel/24inter_iter_300.caffemodel') classifier = Classifier() #pathTotal='/media/lzz/HD1/1Michael/split/301-610new/' #pathTotal='/media/lzz/HD1/1Michael/split/791-1000/' #pathTotal='/media/lzz/HD1/1Michael/split/1-23/' #pathTotal='/media/lzz/HD1/1Michael/split/new/' #pathTotal='/media/lzz/HD1/1Michael/split/new/301-610new/' #pathTotal='/media/lzz/HD1/1Michael/split/new/1-250/' #pathTotal='/media/lzz/HD1/1Michael/split/new/1-250/Aaron 1-180/' #pathTotal='/media/lzz/Data1/michael/301-400/' #pathTotal="/home/lzz/hand/" #pathTotal='/media/lzz/HD1/real/' #pathTotal='/home/lzz/sign/data/' #pathTotal='/media/lzz/Data1/kinect/' #pathTotal='/home/lzz//sign/data/'
dist2+=math.pow(handdicboth[p][i]-self.dic[path].handhog[i],2) if dist1+dist2*lamda<dist: dist=dist1+dist2*lamda prediction=self.dic[p].wordName if self.dic[path].wordName==prediction: correct+=1 else: wrong+=1 accuracy=float(correct)/(float(correct+wrong)) print accuracy def dtwfeature(self): for path in self.filelist: print path self.dic[path].dtwfeature() if __name__ == '__main__': classifier = Classifier() dataset='our' trainname={} testname={} if dataset=='devisign': pathTotal='/media/lzz/Data1/devisign/' #pathTotal='/media/lzz/Data1/own/' #pathTotal='/home/lzz/sign/data1/' trainname['P08']=0 trainname['P02']=0 trainname['P01']=0 trainname['P07']=0 #trainname['P01']=0 testname['P03']=0
def runClassifier(params, settings, fold): classifier.printParameters('Model Parameters',params) classifier.printParameters('Classifier Settings',settings) populationsInput = list() populationsNoiseSource = list() populationsRN = list() populationsPN = list() populationsAN = list() projectionsPNAN = list() #keep handle to these for saving learnt weights if settings['LEARNING']: totalSimulationTime = float(settings['OBSERVATION_EXPOSURE_TIME_MS'] * settings['NUM_OBSERVATIONS']) else: totalSimulationTime = float(settings['OBSERVATION_EXPOSURE_TIME_MS'] * settings['NUM_OBSERVATIONS_TEST']) print 'Total Simulation Time will be', totalSimulationTime DT = 1.0 #ms Integration timestep for simulation classifier.setupModel(params, settings, DT, totalSimulationTime, populationsInput, populationsNoiseSource, populationsRN,populationsPN,populationsAN,projectionsPNAN) utils.recordPopulations(populationsInput,settings['RECORD_POP_INPUT']) utils.recordPopulations(populationsNoiseSource,settings['RECORD_POP_NOISE_SOURCE']) utils.recordPopulations(populationsRN,settings['RECORD_POP_RN']) utils.recordPopulations(populationsPN,settings['RECORD_POP_PN']) utils.recordPopulations(populationsAN,settings['RECORD_POP_AN']) #run the model for the whole learning or the whole testing period classifier.run(totalSimulationTime) fig1 = plt.figure(figsize=(20,20)) plt.xlabel('Time[ms]', fontsize = 16) plt.ylabel('Neurons', fontsize = 16) title = 'Testing' if settings['LEARNING']: title = 'Training' title = title + ' - Odour Classification - ' + str(params['NUM_VR']) + \ ' Virtual Receptors' fig1.suptitle(title, fontsize = 18) indexOffset = 0 indexOffset = 1 + utils.plotAllSpikes(populationsInput, totalSimulationTime, indexOffset, settings['RECORD_POP_INPUT']) indexOffset = 1 + utils.plotAllSpikes(populationsNoiseSource, totalSimulationTime, indexOffset, settings['RECORD_POP_NOISE_SOURCE']) indexOffset = 1 + utils.plotAllSpikes(populationsRN, totalSimulationTime, indexOffset,settings['RECORD_POP_RN']) indexOffset = 1 + utils.plotAllSpikes(populationsPN, totalSimulationTime, indexOffset,settings['RECORD_POP_PN']) indexOffset = 1 + utils.plotAllSpikes(populationsAN, totalSimulationTime, indexOffset,settings['RECORD_POP_AN']) filename = 'RasterPlot-Testing-fold' + str(fold)+'.pdf' if settings['LEARNING']: filename = 'RasterPlot-Training-fold' + str(fold)+'.pdf' plt.savefig(filename) plt.close() (fig2, (ax1, ax2, ax3)) = plt.subplots(3, 1, figsize=(20,20), sharex=True) plt.axes(ax1) utils.plotAllSpikes(populationsRN,totalSimulationTime,0, settings['RECORD_POP_RN']) plt.axes(ax2) utils.plotAllSpikes(populationsPN,totalSimulationTime,0, settings['RECORD_POP_PN']) plt.axes(ax3) utils.plotAllSpikes(populationsAN,totalSimulationTime,0, settings['RECORD_POP_AN']) ax1.set_title('RN layer spikes', fontsize = 30) ax2.set_title('PN layer spikes', fontsize = 30) ax3.set_title('AN layer spikes', fontsize = 30) ax3.set_xlabel('Simulation time[ms]', fontsize = 30) ax3.set_ylabel('Neuron indices', fontsize = 30) ax3.tick_params(labelsize=20) ax2.tick_params(labelsize=20) ax1.tick_params(labelsize=20) filename = 'Separated_RasterPlot-Testing-fold' + str(fold)+'.pdf' if settings['LEARNING']: filename = 'Separated_RasterPlot-Training-fold' + str(fold)+'.pdf' plt.savefig(filename) plt.close() # fig.add_subplot(2,1,2) # utils.plotAllSpikes(populationsAN,totalSimulationTime, 0, settings['RECORD_POP_AN']) #if in the learning stage if settings['LEARNING']: #store the weight values learnt via plasticity, these will be reloaded as #static weights for test stage classLabels = utils.loadListFromCsvFile(settings['CLASS_LABELS_TRAIN'],True) classifier.saveLearntWeightsPNAN(settings, params, projectionsPNAN, len(populationsPN),len(populationsAN)) winningClassesByObservation, winningSpikeCounts = classifier.calculateWinnersAN(settings,populationsAN, classLabels) scorePercent = classifier.calculateScore(winningClassesByObservation,classLabels) else: #save the AN layer spike data from the testing run. #This data will be interrogated to find the winning class (most active AN pop) #during the presentation of each test observation #classifier.saveSpikesAN(settings,populationsAN) classLabels = utils.loadListFromCsvFile(settings['CLASS_LABELS_TEST'],True) winningClassesByObservation, winningSpikeCounts = classifier.calculateWinnersAN(settings,populationsAN, classLabels) scorePercent = classifier.calculateScore(winningClassesByObservation, classLabels) utils.saveListAsCsvFile(winningClassesByObservation,settings['CLASSIFICATION_RESULTS_PATH']) utils.saveListAsCsvFile(winningSpikeCounts,settings['SPIKE_COUNT_RESULTS_PATH']) classifier.end() #write a marker file to allow invoking programs to know that the Python/Pynn run completed utils.saveListToFile(['Pynn Run complete'],settings['RUN_COMPLETE_FILE']) print 'PyNN run completed.' return scorePercent
else: wrong+=1 print correct,wrong accuracy=float(correct)/float(correct+wrong) print accuracy def constructTrajectoryMicrosoft(self): for path in self.filelist: frames=self.dic[path].framelist self.dic[path].consTrajectoryYin(frames,'microsoft') def combineFeatureMicrosoft(self): for path in self.filelist: self.dic[path].combineFeatureMicrosoft() #Microsoft if __name__ == '__main__': classifier = Classifier() dataset='our' trainname={} testname={} if dataset=='devisign': #pathTotal='/media/lzz/Data1/devisign/' #pathTotal='/media/lzz/Data1/own/' pathTotal='/home/lzz/sign/data0/' trainname['P08']=0 trainname['P02']=0 trainname['P01']=0 trainname['P07']=0 #trainname['P01']=0 testname['P03']=0
def evalLR(data, classifyTrained, classifySurface, selectFeatures): classifier = cl.classifyDataLR(data, classifyTrained, classifySurface, selectFeatures) classifier.crossValidation() if (plotSurfaces): classifier.plotDecisionSurface()