def add(self, A, B): '''dodaje kolejne reguly do gramamtyki A-->B''' tmp=0 # sprawdzanie czy istnieje juz taki klasyfikator: if self.G.has_key(A): for x in self.G[A]: if x.name==B: tmp=1 if tmp == 0: #scisk tutaj: if(self.classifierNum >= self.parametry.np): self.scisk_new(B,A) #i dodajemy self.G[A].append(classifier(B,A)) print "dodalem:" + A +"-->"+B if len(B)>1: self.classifierNum+=1 return 1 else: #print "nie dodalem bo juz jest:)" return -1 else: if(self.classifierNum >= self.parametry.np): self.scisk_new(B,A) self.G[A] = [classifier(B,A)] print "dodalem:" + A +"-->"+B self.classifierNum+=1 return 1
def __init__(self): self.parametry = properties() self.usedTerminals =[] #Gramatyka w postaci slownika #self.G = {"S":[classifier("AB","S"),classifier("AC","S")],"A":[classifier("BA","A"),classifier("a","A")],"B":[classifier("CC","B"),classifier("b","B")],"C":[classifier("AB","C"),classifier("a","C")]} self.G = {"S":[classifier("AB","S"),classifier("AC","S")],"C":[classifier("SB","C"),classifier("a","C")],"B":[classifier("BB","B"),classifier("b","B")],"A":[classifier("a","A")]} self.classifierNum=0 for x in self.G: for y in self.G[x]: if len(y.name)>1: self.classifierNum+=1
def rand_grid_search(self,trainsize,testsize,sets,tries, Clf, Feature_finder,Convert_grey, Rootsift, Pca_before_kmeans, Kclusters, Pca_ratio, Tfidf, Incremental_threshold): allparms = [Clf, Feature_finder,Convert_grey, Rootsift, Pca_before_kmeans, Kclusters, Pca_ratio, Tfidf, Incremental_threshold] iterlist = itertools.product(*allparms) mysplit = self.test_sets(trainsize,testsize,sets) results = pd.DataFrame(columns=['a1','a2','a3','a4','a5','a6','a7','a8','a9','a10','a11','a12']) mylist = [] for i in iterlist: mylist.append(i) for p in random.sample(mylist,tries): traintime = [] testtime = [] f1 = [] for (train,test) in mysplit: cf1 = cf.classifier(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]) cf1.incremental_train(train) f1.append(cf1.test(test)) traintime.append(cf1.train_time) testtime.append(cf1.test_time) avgtrain = np.average(traintime) avgtest = np.average(testtime) avgf1 = np.average(f1) results = results.append({'a1': avgf1, 'a2': avgtrain, 'a3': avgtest, 'a4': type(p[0]).__name__, 'a5': p[1], 'a6': p[2], 'a7': p[3], 'a8': p[4], 'a9': p[5], 'a10': p[6], 'a11': p[7], 'a12': p[8]},ignore_index = True) results = results.rename(columns={'a1': 'F1', 'a2': 'Avg Train Time', 'a3': 'Avg Test Time', 'a4': 'CLF', 'a5': 'Feature Finder','a6': 'Convert Grey', 'a7': 'RootSIFT','a8':'PCA First','a9': 'K Clusters','a10': 'PCA Ratio','a11': 'TF-IDF', 'a12': 'Incremental Threshold'}) results = results.sort_values(by="F1",ascending=False) return results
def class_results(): print "bbb" c1 = classifier.classifier() print "aaa" #c1.build_tf_idf() #c1.feature_selection() c1.build_tf_idf_feature_selection() c1.vectorization() # c1.calculate_df_in_classes() # c1.chi_square() # c1.chi_feature_list(50) svc1 = c1.svm_train_food() c1.svm_test_food(svc1) print "============the classification results for food are:=================== " for i in c1.food_results: print i['Title'],"\n", i['Content'],"\n" svc2 = c1.svm_train_movie() c1.svm_test_movie(svc2) print "=============the classification results for movie are: ===============" for i in c1.movie_results: print i['Title'],"\n", i['Content'],"\n" return c1.food_results, c1.movie_results
def train_store(path, path_mode, axis, model_name): trs, maxLen = get_all(path) training = unified_len(trs, maxLen, D, axis) TRTS = classifier.classifier() centroids = TRTS.get_point_centroids(training, N, D) ATrainBinned = TRTS.get_point_clusters(training, centroids, D) ''' **************************************************** * Training **************************************************** ''' # Set priors pP = TRTS.prior_transition_matrix(M, LR) # Train the model: b = [x for x in range(N)] cyc = 50 E, P, Pi, LL = TRTS.dhmm_numeric(ATrainBinned, pP, b, M, cyc, .00001) sumLik = 0 minLik = numpy.Infinity for j in range(len(ATrainBinned)): lik = TRTS.pr_hmm(ATrainBinned[j], P, E.transpose(), Pi) if lik < minLik: minLik = lik sumLik = sumLik + lik gestureRecThreshold = 2.0 * sumLik / len(ATrainBinned) print("The threshold is ", gestureRecThreshold) store_model(path_mode, model_name, E, P, Pi, centroids, gestureRecThreshold)
def main(): doc = open("fingerprintGender.txt",'r') wordsDict = getwords(doc.read()) genericClassifier = classifier.classifier(wordsDict) genericClassifier.setdb("generic.db") sampletrain(genericClassifier) print "---genericClassifier---" print genericClassifier.weightedprob('quick rabbit','good', genericClassifier.fprob) print "---Naive Bayes---" bayesClassifier = naivebayes.naivebayes(wordsDict) bayesClassifier.setdb("bayes.db") sampletrain(bayesClassifier) print bayesClassifier.prob('quick rabbit','good') bayesClassifier.classify('quick money',default='unknown') for i in range(10): sampletrain(bayesClassifier) print bayesClassifier.classify('quick money',default='unknown') print "---FISHER CLASSIFIER---" fisher = fisherclassifier.fisherclassifier(wordsDict) fisher.setdb("fisher.db") sampletrain(fisher) print fisher.fisherprob('quick rabbit','good') print fisher.weightedprob('money','bad', fisher.cprob)
def inwersja(self,A,classA): ''' genetyk inwersji nie sprawdzone czy napewno dziala poprawnie!!!''' for x in G[A]: if x.name == classB.name: tmp=1 if tmp==0: slef.G[A].append(classifier(str(classA[1])+str(classA[0])))
def train_all(self, clf, feature_finder, convert_grey, rootsift, pca_before_kmeans, kclusters, pca_ratio, tfidf, incremental_threshold): n = 9999999999999 for cat in self.categories: n = min(n,len(self.imgs[cat])) (train,test) = self.rand_train_test(n,0) c1 = cf.classifier(clf, feature_finder, convert_grey, rootsift, pca_before_kmeans, kclusters, pca_ratio, tfidf, incremental_threshold) c1.incremental_train(train) return c1
def __init__( self ): self.driver = webdriver.Firefox() self.classifier = cf.classifier() self.URLs = [] self.contexts = [] self.bag = utils.load_dictionary() self.tagger = Mecab()
def main(): path = "data\\model\\" modelLoader = Loader() models = modelLoader.getAllModels(path) files = modelLoader.getAllFileNames("data\\tests") names = [f.partition(".csv")[0] for f in files] tests = modelLoader.loadTest("data\\tests\\", files) # data = dataflow.dataflow("data", gesture) TS = classifier.classifier() # E, P, Pi, cent, gestureRecThreshold = data.load_model() for _, testName in enumerate(tests): oneTest = numpy.empty(shape = (60, 1, 3)) oneTest[:,0,:] = tests[testName] for j, gesture in enumerate(models): model = models[gesture] E, P, Pi, cent, gestureRecThreshold = (model["E"], model["P"], model["Pi"], model["centroids"], model["threshold"]) ATestBinned = TS.get_point_clusters(oneTest, cent, D) tLL = TS.pr_hmm(ATestBinned[0], P, E.transpose(), Pi) if tLL > gestureRecThreshold: print("Log Likelihood: %.3f > %.3f (threshold) -- FOUND %s Gesture" % (tLL, gestureRecThreshold, gesture)) else: print("Log Likelihood: %.3f < %.3f (threshold) -- NO %s Gesture" % (tLL, gestureRecThreshold, gesture)) return # print(i) oneTest = tests["1"] #oneTest = data.get_tests_attached(oneTest) ATestBinned = TS.get_point_clusters(oneTest, cent, D) print(ATestBinned.shape) # print('\n********************************************************************') # print('Testing %d sequences for a log likelihood greater than %.4f' % (len(ATestBinned), gestureRecThreshold)) # print('********************************************************************\n'); recs = 0 tLL = numpy.zeros(shape=(len(ATestBinned), 1)) for j in range(len(ATestBinned)): tLL[j, 0] = TS.pr_hmm(ATestBinned[j], P, E.transpose(), Pi) if tLL[j, 0] > gestureRecThreshold: recs = recs + 1 print("Log Likelihood: %.3f > %.3f (threshold) -- FOUND %s Gesture" % (tLL[j, 0], gestureRecThreshold, gesture)) else: print("Log Likelihood: %.3f < %.3f (threshold) -- NO %s Gesture" % (tLL[j, 0], gestureRecThreshold, gesture)) print('Recognition success rate: %.2f percent\n' % (100 * recs / len(ATestBinned)))
def __init__(self): self.method = 0 self.data = [None,None,None,None,None] self.values = [None,None,None,None,None] self.maximum = None self.names = [' Eucl Dist', ' Maha Dist', ' Eucl Vote', ' Maha Vote', ' Custom'] file1 = open("hw5db1.txt","r") file2 = open("hw5db2.txt","r") dataIn = readfile.readFile(file1) stats = readfile.readStats(file2) self.vectors = vector_handler.vector_holder(dataIn,stats) self.classifier = classifier.classifier()
def benchmark(self,trainsize, testsize, sets, clf, feature_finder, convert_grey, rootsift, pca_before_kmeans, kclusters, pca_ratio, tfidf, incremental_threshold): mysplit = self.test_sets(trainsize,testsize,sets) traintime = [] testtime = [] f1 = [] for (train,test) in mysplit: cf1 = cf.classifier(clf, feature_finder, convert_grey, rootsift, pca_before_kmeans, kclusters, pca_ratio, tfidf, incremental_threshold) cf1.incremental_train(train) f1.append(cf1.test(test)) traintime.append(cf1.train_time) testtime.append(cf1.test_time) print "Average Training Time: {} seconds".format(np.average(traintime)) print "Average Testing Time: {} seconds".format(np.average(testtime)) print "Average F1 Score: {}".format(np.average(f1))
def ruletSelection(self): suma = 0.0 for x in self.grammar.G: for y in self.grammar.G[x]: if len(y.right) == 2: y.r1 = suma suma += y.fitness y.r2 = suma print "suma:" print suma print "losowa" losowa = random.uniform(0, suma) print losowa for x in self.grammar.G: for y in self.grammar.G[x]: if y.r1 < losowa < y.r2: A = classifier(y.right, y.left) losowa = random.uniform(0, suma) print losowa while 1: # print "while" for x in self.grammar.G: for y in self.grammar.G[x]: if y.r1 < losowa < y.r2: if y.compare(A) != 3: B = classifier(y.right, y.left) print "znalazlem" return (A, B) else: losowa = random.uniform(0, suma) return (A, B)
def add(self, A, B): '''dodaje kolejne reguly do gramamtyki A-->B''' print "add" print self.classifierNum tmp=0 # sprawdzanie czy istnieje juz taki klasyfikator: if self.G.has_key(A): for x in self.G[A]: if x.right==B: #znalazlem juz taki klasyfikator: tmp=1 return -1 if tmp == 0: #scisk tutaj: if(self.classifierNum >= self.parametry.np): self.scisk(A,B) #i dodajemy self.G[A].append(classifier(B,A)) if len(B)>1: self.classifierNum+=1 print "dodalem" print str(A) +'-->' +str(B) return 1 else: #print "nie dodalem bo juz jest:)" return -1 else: if(self.classifierNum >= self.parametry.np): self.scisk(A,B) self.G[A] = [classifier(B,A)] print "dodalem" print str(A) +'-->' +str(B) self.classifierNum+=1 return 1
def main(): path = "data\\Coords\\training" training = get_all_training(path) testing = get_all_training("data\\Coords\\testing") TRTS = classifier.classifier() centroids = TRTS.get_point_centroids(training, N, D) diff_test = transformation(testing, training) testing = diff_test ATrainBinned = TRTS.get_point_clusters(training, centroids, D) ATestBinned = TRTS.get_point_clusters(testing, centroids, D) print(ATrainBinned, ATestBinned) pP = TRTS.prior_transition_matrix(M, LR) # Train the model: b = [x for x in range(N)] cyc = 50 E, P, Pi, LL = TRTS.dhmm_numeric(ATrainBinned, pP, b, M, cyc, .00001) sumLik = 0 minLik = numpy.Infinity for j in range(len(ATrainBinned)): lik = TRTS.pr_hmm(ATrainBinned[j], P, E.transpose(), Pi) if lik < minLik: minLik = lik sumLik = sumLik + lik gestureRecThreshold = 2.0 * sumLik / len(ATrainBinned) print('\n********************************************************************') print('Testing %d sequences for a log likelihood greater than %.4f' % (len(ATestBinned), gestureRecThreshold)) print('********************************************************************\n'); gesture = "Gesture" recs = 0 tLL = numpy.zeros(shape=(len(ATestBinned))) for j in range(len(ATestBinned)): tLL[j] = TRTS.pr_hmm(ATestBinned[j], P, E.transpose(), Pi) if tLL[j] > gestureRecThreshold: recs = recs + 1 print("Log Likelihood: %.3f > %.3f (threshold) -- FOUND %s Gesture" % (tLL[j], gestureRecThreshold, gesture)) else: print("Log Likelihood: %.3f < %.3f (threshold) -- NO %s Gesture" % (tLL[j], gestureRecThreshold, gesture)) print('Recognition success rate: %.2f percent\n' % (100 * recs / len(ATestBinned)))
def __init__(self): #tworzenie instncji parametrow self.parametry = properties() self.allowFulCover = 1 #Gramatyka w postaci slownika #self.G = {"S":[classifier("AB","S"),classifier("AC","S")],"A":[classifier("BA","A"),classifier("a","A")],"B":[classifier("CC","B"),classifier("b","B")],"C":[classifier("AB","C"),classifier("a","C")]} self.G = {"S":[classifier("AB","S"),classifier("AC","S")],"C":[classifier("SB","C"),classifier("a","C")],"B":[classifier("BB","B"),classifier("b","B")],"A":[classifier("a","A")]} self.G_backUp = {"S":[classifier("AB","S"),classifier("AC","S")],"C":[classifier("SB","C"),classifier("a","C")],"B":[classifier("BB","B"),classifier("b","B")],"A":[classifier("a","A")]} self.classifierNum=0 self.allowCover = 1 for x in self.G: for y in self.G[x]: if len(y.right)>1: self.classifierNum+=1
def classify_images(images_dir, results_dic, model): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary using the extend function. Be sure to format the classifier labels so that they will match your pet image labels. The format will include putting the classifier labels in all lower case letters and strip the leading and trailing whitespace characters from them. For example, the Classifier function returns = 'Maltese dog, Maltese terrier, Maltese' so the classifier label = 'maltese dog, maltese terrier, maltese'. Recall that dog names from the classifier function can be a string of dog names separated by commas when a particular breed of dog has multiple dog names associated with that breed. For example, you will find pet images of a 'dalmatian'(pet label) and it will match to the classifier label 'dalmatian, coach dog, carriage dog' if the classifier function correctly classified the pet images of dalmatians. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images within this function Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) --- where index 1 & index 2 are added by this function --- NEW - index 1 = classifier label (string) NEW - index 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ for key, value in results_dic.items(): path = "{}{}".format(images_dir, key) classifier_label = (classifier(path, model)).lower().strip() val = 1 if value[0] in classifier_label else 0 value.extend([classifier_label, val]) results_dic[key] = value
def compare_precisions_by_nb_of_components(): kmeans = kmeans_.load_kmeans('kmeans-20.dat') train_data, train_labels = load_mnist(dataset='training', path=args.path) train_data = np.reshape(train_data, (train_data.shape[0], 784)) test_data, test_labels = load_mnist(dataset='testing', path=args.path) test_data = np.reshape(test_data, (test_data.shape[0], 784)) d = 40 reducer = sklearn.decomposition.PCA(n_components=d) reducer.fit(train_data) train_data_reduced = reducer.transform(train_data) test_data_reduced = reducer.transform(test_data) kmeans_reduced = reducer.transform(kmeans) label_set = set(train_labels) precisions = [] ks = list(range(1, 11)) + [15, 20, 30] for k in ks: print('learning {} components'.format(k)) model = classifier.classifier(k, covariance_type='full', model_type='gmm', means_init_heuristic='kmeans', means=kmeans_reduced, verbose=False) model.fit(train_data_reduced, train_labels) predicted_labels = model.predict(test_data_reduced, label_set) expected_labels = test_labels precision = np.mean(predicted_labels == expected_labels) precisions.append((k, precision)) print('precision: {}'.format(precision)) print(precisions)
def scisk(self, left, name): '''metoda dodaje dany klasyfikator ze sciskiem left-->name''' print "scisk" cs_table = [] cf_table = [] choose = 0 minmal = 100000 for y in range(0,self.parametry.cf): for x in range(0,self.parametry.cs): keys = self.G.keys() while 1: B = random.choice(keys) #if len(self.G[B])>1: #break K = self.G[B][random.randint(0,len(self.G[B])-1)] if len(K.getRight()) > 1: break if K.fitness <= minmal: minmal = K.fitness choose = K cf_table.append(K) #return cf_table #szukamy najbardziej podobnego: podobienstwo_max = -1 tmp_cla = classifier(name,left) print tmp_cla for x in cf_table: podobienstwo = x.compare(tmp_cla) if podobienstwo_max < podobienstwo: choose = x #usuwamy najbardziej podobny z klasyfikatorów: #self.rem(choose.left, choose.right) #dodaje nowy klasyfikator: #self.add(left,name) return self.rem(choose)
def classify_images(images_dir, results_dic, model): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary using the extend function. Be sure to format the classifier labels so that they will match your pet image labels. The format will include putting the classifier labels in all lower case letters and strip the leading and trailing whitespace characters from them. For example, the Classifier function returns = 'Maltese dog, Maltese terrier, Maltese' so the classifier label = 'maltese dog, maltese terrier, maltese'. Recall that dog names from the classifier function can be a string of dog names separated by commas when a particular breed of dog has multiple dog names associated with that breed. For example, you will find pet images of a 'dalmatian'(pet label) and it will match to the classifier label 'dalmatian, coach dog, carriage dog' if the classifier function correctly classified the pet images of dalmatians. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images within this function Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) --- where index 1 & index 2 are added by this function --- NEW - index 1 = classifier label (string) NEW - index 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ for filename in results_dic: label = results_dic[filename][0] classifier_label = classifier("{}/{}".format(images_dir, filename), model) results_dic[filename] = [label, classifier_label, compare_labels(label, classifier_label)]
def classify_images(images_dir, petlabel_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its key is the pet image filename & its value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ results_dic = {} for filename, label in petlabel_dic.items(): result = classifier(images_dir + filename, model) result = result.strip().lower() found_idx = result.find(label) if found_idx < 0: results_dic[filename] = [label, result, 0] elif ((found_idx == 0 or result[found_idx - 1] == " ") and (found_idx + len(label) == len(result) or result[found_idx + len(label):found_idx + len(label) + 1] in (" ", ","))): results_dic[filename] = [label, result, 1] else: results_dic[filename] = [label, result, 0] return results_dic
def classify_images(images_dir, petlabel_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its' key is the pet image filename & it's value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ results_dic = dict() for fname in petlabel_dic.keys(): """ watch for the race condition of files being identified in the petlabel_dic data structure that may have been deleted/renamed before executing this function """ full_path = Path(join(images_dir, fname)) if full_path.exists(): image_classification = classifier(str(full_path), model).lower().strip() found = classification_match(petlabel_dic[fname], re.split(r",\s+", image_classification)) results_dic[str(full_path)] = [petlabel_dic[fname], image_classification, found] else: print("file does not exist: {0}".format(full_path)) return results_dic
def classify_images(images_dir, petlabel_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its' key is the pet image filename & it's value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ results_dic = dict() for filename in petlabel_dic: model_label = classifier(images_dir + filename, model).lower().strip() file_label = petlabel_dic[filename] if filename not in results_dic: results_dic[filename] = [file_label, model_label] if (file_label in model_label.split()) or (file_label in [ model_str.strip() for model_str in model_label.split(",") ]): results_dic[filename].append(1) else: results_dic[filename].append(0) else: print("Warning: Duplicate files exist in directory", filename) return results_dic
def trapezoidalExperimentSparse(input_dataset, dataset_name): print("Trapezoidal experiment with OLVF: " + str(dataset_name)) error_vector = np.zeros(len(input_dataset)) feature_summary = [ len(row) for row in preprocess2.removeDataTrapezoidal( copy.deepcopy(input_dataset)) ] for i in range(parameters.rounds): print("Round: " + str(i)) random.seed(parameters.seed) random.shuffle(input_dataset) current_dataset = preprocess2.removeDataTrapezoidal( copy.deepcopy(input_dataset)) current_classifier = c.classifier(current_dataset, [], 1) classifier_summary, stream_error = current_classifier.train() error_vector = np.add(error_vector, stream_error) average_error_vector = np.divide(error_vector, parameters.rounds) #misc.plotError(average_error_vector, dataset_name) #misc.plotFeatures(feature_summary, dataset_name) #misc.plotClassifierDimension(classifier_summary, dataset_name) print(current_classifier.mean_dict) return average_error_vector
def classify_images(images_dir, results_dic, model): """ Runs a pre trained model for each of the images in the image dir, mutating the results dict with the classifier results Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutated in place. The value will be extended with the classifier label (index 1) and whether the prediction is true (index 2) """ if not images_dir.endswith('/'): images_dir += '/' for pet_image_filename in list(results_dic.keys()): result_dict_item = results_dic[pet_image_filename] # do the prediction prediction = classifier(images_dir + pet_image_filename, model) prediction = prediction.lower().strip() # get the label pet_label = result_dict_item[0] # check for a match by looking for the pet label in the prediction result match = 0 if pet_label in prediction: match = 1 # store the result result_dict_item.extend([prediction, match]) return None
def nextPage(): if chk1_state.get() == 1: npage = messagebox.askyesno("Run Program", "Run program with Algorithm 1?") if npage == True: controller.show_frame(ProgBarPage) predictions = classifier.classifier(files) elif chk2_state.get() == 1: npage = messagebox.askyesno("Run Program", "Run program with Algorithm 2?") if npage == True: print("Two has been chosen but is not available yet") elif chk3_state.get() == 1: npage = messagebox.askyesno("Run Program", "Run program with Algorithm 3?") if npage == True: print("Three has been chosen but is not available yet") else: messagebox.showwarning( "Must select an algorithm", "Please select an algorithm to continue")
def getPrunedDecisionTree(self, l, k, data, attributes, currentAccuracy): classifer = classifier.classifier() d_best = copy.deepcopy(self) bestAccuracy = currentAccuracy for i in range(l): d_prime = copy.deepcopy(d_best) m = randint(1, k) for j in range(1, m): allNodesList = self.getAllNodes(d_prime.root) n = len(allNodesList) if n == 1 or n == 0: continue else: p = randint(0, n - 1) self.pruneSubTree(allNodesList[p]) accuracy = classifer.classify(data, d_prime.root, attributes) if accuracy > bestAccuracy: d_best = d_prime bestAccuracy = accuracy return d_best
def classify_images(images_dir, petlabel_dic, model): results_dic = dict() for index in petlabel_dic: model_label = classifier(images_dir+index, model) model_label = model_label.lower() model_label = model_label.strip() truth = petlabel_dic[index] found = model_label.find(truth) if found >= 0: if ( (found == 0 and len(truth)==len(model_label)) or ( ( (found == 0) or (model_label[found - 1] == " ") ) and ( (found + len(truth) == len(model_label)) or (model_label[found + len(truth): found+len(truth)+1] in (","," ") ) ) ) ): if index not in results_dic: results_dic[index] = [truth, model_label, 1] else: if index not in results_dic: results_dic[index] = [truth, model_label, 0] else: if index not in results_dic: results_dic[index] = [truth, model_label, 0] return(results_dic)
def classify_images(images_dir, results_dic, model = "vgg"): for key in results_dic: #Creating a model label by taking the directory name, name in results_dic, and the model version. model_label = "" model_label += classifier(images_dir+key, model) model_label = model_label.lower() model_label = model_label.strip() #creating a truth label, this will help identify if the item was found in the model_label for the next function. The find() method will return the lowest index of the start of found string. truth = results_dic[key][0] petsinmodels = model_label.find(truth) #if the index is larger than -1, that means it exists. We then extend the key values appropriately. if (petsinmodels >= 0): results_dic[key].extend((model_label, 1)) #if this is not the case, then we extend appropriately (0 for absent). else: results_dic[key].extend((model_label, 0)) return results_dic
def my_form_post(): text = request.form['query'] articles = newsbot.getnews(text) tag2d = [] for lst in articles: tag2d.append(classifier.classifier(lst)) full = getall.getall("Navya") X = full[0] y = full[1] clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 40, 10, 2), random_state=1) clf.fit(X, y) relevance_array = clf.predict(tag2d) artprint = newsresults.show_news(text) finalarr = chooser.chooser(relevance_array, artprint) disp1 = finalarr[0] disp2 = finalarr[1] disp3 = finalarr[2] disp4 = finalarr[3] disp5 = finalarr[4] disp6 = finalarr[5] return render_template('WebPage2.html', disp1 = disp1, disp2 = disp2, disp3 = disp3, disp4 = disp4, disp5 = disp5, disp6 = disp6), 200
def __init__(self, config): super(model, self).__init__() self.config = config self.hidden_size = config.d_hidden self.embed = ELMoEmbedding(config) if config.elmo else nn.Embedding( config.n_embed, config.d_embed, padding_idx=0) self.en = TPRULayer(config) self.clser = classifier(config) self.en_h0 = nn.Parameter( torch.zeros(config.n_layers * (2 if config.bidirectional else 1), 1, config.d_hidden)) self.en_h0.requires_grad = False att_config = config att_config.d_embed = config.d_embed + config.d_hidden * 2 * ( 2 if config.bidirectional else 1) att_config.n_layers = config.att_layers self.att = TPRULayer(att_config) self.att_h0 = nn.Parameter( torch.zeros(config.att_layers * (2 if config.bidirectional else 1), 1, config.d_hidden)) self.att_h0.requires_grad = False
def split(): test_size = float(request.form['test_size']) random_state = int(request.form['random_state']) model_name = request.form['model_name'] model_select = request.form.getlist('model_select') col_choosed = ','.join(train_cols) datset_path = 'User_dataset/' + session['username'] +'/' + f_name+'.csv' # read data train_set = pd.read_csv(datset_path) y = train_set[label_cols] X = train_set[train_cols] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) #print(type(model_select)) clf = classifier() start_time = time.time() model = getattr(clf, model_select[0])(X_train,y_train) training_time = time.time() - start_time model_path = 'User_dataset/' + session['username'] +'/' + model_name+'.pickle' clf.modelsave(model_path,model) #print(model.predict(X_test)) acc_log = round(model.score(X_test,y_test)*100,2) DoSQL().IUD_db("INSERT INTO HelpUTrainlog(username,training_time,dataset_name,model_name,model_choosed,col_choosed,test_size,random_state,val_acc) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)", (session['username'],training_time,f_name,model_name,model_select[0],col_choosed,test_size,random_state,acc_log)) flash('Training success','success') return render_template('dataload.html')
def run(self): rasterised_fname = pjoin(dirname(self.out_fname), CONFIG.get('outputs', 'rasterise_filename')) ds_list_fname = pjoin(dirname(self.out_fname), CONFIG.get('outputs', 'query_filename')) with open(ds_list_fname, 'r') as infile: ds_list = pickle.load(infile) dataset = ds_list[self.idx] nbar_ds = dataset.datasets[DatasetType.ARG25] pq_ds = dataset.datasets[DatasetType.PQ25] classified_img = classifier(nbar_ds, pq_ds) # hard code; as this will be short lived due to agdc-v2 development class_ids = [0, 1, 2, 3, 4, 5] with rasterio.open(rasterised_fname, 'r') as src: zones_img = src.read(1) result = zonal_class_distribution(classified_img, zones_img, class_ids=class_ids) # Set the timestamp result['Timestamp'] = dataset.start_datetime # Open the output hdf5 file store = pandas.HDFStore(self.output().path) # Write the dataframe store.append('data', result) # Save and close the file store.close()
def classify_images(images_dir, results_dic, model): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary. Recall that dog names from the classifier function can be a string of dog names separated by commas when a particular breed of dog has multiple dog names associated with that breed. For example, you will find pet images of a 'dalmatian'(pet label) and it will match to the classifier label 'dalmatian, coach dog, carriage dog' if the classifier function correctly classified the pet images of dalmatians. Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) index 1 = classifier label (string) index 2 = 1/0 (int) where 1 = match between pet image and classifier labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ for image_filename, classification_list in results_dic.items(): test_image = images_dir + image_filename classifier_label = classifier(test_image, model).lower().strip() pet_label = classification_list[0] classification_list.extend( [classifier_label, int(pet_label in classifier_label)])
def variableFeatureExperiment(input_dataset, dataset_name, mode="OLVF_random_sparse"): print("Variable feature experiment with OLVF: " + str(dataset_name)) error_vector = np.zeros(len(input_dataset)) feature_summary = [ len(row) for row in preprocess2.removeRandomData(copy.deepcopy(input_dataset)) ] current_dataset = preprocess2.removeRandomData( copy.deepcopy(input_dataset)) for i in range(parameters.rounds): print("Round: " + str(i)) random.seed(parameters.seed) random.shuffle(current_dataset) current_classifier = c.classifier(current_dataset, []) classifier_summary, stream_error = current_classifier.train() error_vector = np.add(error_vector, stream_error) average_error_vector = np.divide(error_vector, parameters.rounds) #misc.plotError(average_error_vector, dataset_name) #misc.plotFeatures(feature_summary, dataset_name) #misc.plotClassifierDimension(classifier_summary, dataset_name) print(current_classifier.mean_dict) return average_error_vector
def classify_images(images_dir, results_dic, model='vgg'): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary using the extend function. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images within this function Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) --- where index 1 & index 2 are added by this function --- NEW - index 1 = classifier label (string) NEW - index 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ for key in results_dic: # Use classifier to classify images classifier_label = classifier(images_dir + key, model).strip().lower() # Append classifier labels to results dictionary results_dic[key].append(classifier_label) if results_dic[key][0] in classifier_label: # Verify classifier labels using actual pet image labels results_dic[key].append(1) else: results_dic[key].append(0)
def classify_images(img_dir, petlabel_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its' key is the pet image filename & it's value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ results_dic = dict() for key in petlabel_dic: #print("\n{}: ".format(filename)) pet_label = petlabel_dic[key] classifier_label = classifier(img_dir + key, model).lower().strip() found_idx = classifier_label.find(pet_label) results_dic[key] = [ pet_label, classifier_label, is_true_match(found_idx, classifier_label, pet_label) ] return results_dic
def main(): #loading the dataset d = arff.loadarff('PhishingData.arff') phishingdata = pd.DataFrame(d[0]) for col in phishingdata.columns: phishingdata[col] = phishingdata[col].str.decode('utf-8') #Creating train and test data by splitting the dataset into 80% and 20% respectively phishingdata_80percent = phishingdata.iloc[:1082,:] phishingdata_20percent = phishingdata.iloc[1082:,:] train_x = phishingdata_80percent.iloc[:,:9] train_y = phishingdata_80percent.iloc[:,9] test_x = phishingdata_20percent.iloc[:,:9] test_y = phishingdata_20percent.iloc[:,9] #creating an array for each row converting string to int values trainxvalues = [] for trainxval in train_x.values: trainxarray = [int(x1) for x1 in trainxval] trainxvalues.append(trainxarray) trainyvalues = [int(trainyval) for trainyval in train_y.values] testxvalues = [] for testxval in test_x.values: testxarray = [int(x) for x in testxval] testxvalues.append(testxarray) testyvalues = [int(testyval) for testyval in test_y.values] #For different values of k ranging from 2 to 32, call the classifier, fit and predict methods for i in range(2,33): c = classifier(i) c.fit(trainxvalues, trainyvalues) hyp = c.predict(testxvalues) print('k = %d -> Accuracy: %0.4f' %(i, accuracy_score(testyvalues, hyp)))
def main(data_filename, output_directory, *features): # Set random seed. np.random.seed(1) # Create output directory if it does not exist. make_sure_path_exists(output_directory) # Load data from file. m_1, m_2, s, rho = np.loadtxt(data_filename, unpack=True) s = s.astype(bool) # Compute standard quantitites. eta = gw.symmetric_mass_ratio(m_1, m_2) M_c = gw.chirp_mass(m_1, m_2) x_err = gw.chirp_mass_log_error(M_c, rho) q = gw.mass_ratio(m_1, m_2) q_err = gw.mass_ratio_error(M_c, rho) D = gw.detectable_distance(M_c) V = (4/3) * np.pi * D**3 T = 0.6 # Transform M_c into log-space. x = np.log10(M_c) # Compute the weights for each M_c. w = 1 / (V*T) # Generate `n_smooth` evenly-spaced values of log(M_c) for visualization # purposes. x_smooth = np.linspace(np.min(x), np.max(x), num=1000) M_c_smooth = 10**x_smooth # Create Figure. fig_density = plt.figure() # Set layout of Figure such that there are 3 vertically stacked subplots, # with the bottom one being 1/5 the size of the other two. gs = mpl.gridspec.GridSpec(2, 1, height_ratios=[5,1]) # Create subplot axes, with shared x-axes. ax_pdf = fig_density.add_subplot(gs[0]) ax_data = fig_density.add_subplot(gs[1], sharex=ax_pdf) # Set axis labels. ax_data.set_xlabel(r"$\mathcal{M}_c\ [M_\odot]$") ax_pdf.set_ylabel(r"$r(\mathcal{M}_c)$") # Hide unwanted axis ticks and tick labels. plt.setp(ax_pdf.xaxis.get_ticklabels(), visible=False) ax_data.yaxis.set_ticks([]) ax_pdf.semilogx() ax_data.semilogx() # Create log-scale Figure. fig_log_density = plt.figure() # Set layout of Figure such that there are 3 vertically stacked subplots, # with the bottom one being 1/5 the size of the other two. gs = mpl.gridspec.GridSpec(2, 1, height_ratios=[5,1]) # Create subplot axes, with shared x-axes. ax_log_pdf = fig_log_density.add_subplot(gs[0]) ax_log_data = fig_log_density.add_subplot(gs[1], sharex=ax_log_pdf) # Set axis labels. ax_log_data.set_xlabel(r"$\mathcal{M}_c\ [M_\odot]$") ax_log_pdf.set_ylabel(r"$r(\mathcal{M}_c)$") # Hide unwanted axis ticks and tick labels. plt.setp(ax_log_pdf.xaxis.get_ticklabels(), visible=False) ax_log_data.yaxis.set_ticks([]) ax_log_pdf.loglog() ax_log_data.semilogx() r_fn, r_err_fn = chirp_mass_distribution(M_c, M_c_smooth, x, x_smooth, w, s, ax_pdf, ax_data, ax_log_pdf, ax_log_data) if ("power_law" in features) or ("all" in features): power_law(r_fn, r_err_fn, M_c, M_c_smooth, x, x_smooth, ax_pdf, ax_data, ax_log_pdf, ax_log_data) if ("mcmc" in features) or ("all" in features): lam_mcmc = chis_code(np.log10(M_c),r_fn(np.log10(M_c)),r_err_fn(np.log10(M_c)),output_directory) # (x,y,yerr) if ("classifier" in features) or ("all" in features): classifier(m_1, m_2, M_c, s, ax_pdf, ax_data, ax_log_pdf, ax_log_data, output_directory) ax_pdf.legend() fig_density.savefig(path.join(output_directory, "chirp-mass-distribution.pdf")) fig_log_density.savefig(path.join(output_directory, "chirp-mass-log-distribution.pdf"))
return o,r mat = sio.loadmat("./single_ex.mat") data = mat['a'] data_ = np.expand_dims(data,0) units = [24,24,960,96] ################# act = ['leaky_relu6','tanh','linear'] cl = classifier(units,act) cl.generate_classifier() cl.init_network() cl.load_model(session=cl.session,name='./converted.mdl') o,r = get_conv(data_)
def ex(path,k): execute = classifier(path,k) execute.rec()
""" experiment_time = 100 np.random.seed(0) means = ( ((-5, 0), (5, 0)), ((-2, 0), (2, 0)), ((-1, 0), (1, 0)) ) C = ( [[1, 0], [0, 1]], [[1, 0], [0, 1]] ) for i, m in zip(range(0, 3), means): X, target = data_generate(m, C, 200, 1) print "For problem 3.%i:" % (i+1) perceptron = classifier.classifier("perceptron", X, target) print "\tScore for Perceptron: \t\t%f" % perceptron.score() showPlot(X, target, perceptron.coef()[0]) linear_model = classifier.classifier("linear_model", X, target) print "\tScore for Linear Regression: \t%f" % linear_model.score() showPlot(X, target, linear_model.coef()) lms = classifier.classifier("lms", X, target) print "\tScore for LMS algorithm: \t%f" % lms.score() showPlot(X, target, lms.coef())
def main(arguments=None): """ The main function used when ``cl_utils.py`` is run as a single script from the cl, or when installed as a cl command """ # setup the command-line util settings su = setup_main_clutil( arguments=arguments, docString=__doc__, logLevel="DEBUG", options_first=False, projectName="sherlock" ) arguments, settings, log, dbConn = su.setup() # tab completion for raw_input readline.set_completer_delims(' \t\n;') readline.parse_and_bind("tab: complete") readline.set_completer(tab_complete) # unpack remaining cl arguments using `exec` to setup the variable names # automatically for arg, val in arguments.iteritems(): if arg[0] == "-": varname = arg.replace("-", "") + "Flag" else: varname = arg.replace("<", "").replace(">", "") if isinstance(val, str) or isinstance(val, unicode): exec(varname + " = '%s'" % (val,)) else: exec(varname + " = %s" % (val,)) if arg == "--dbConn": dbConn = val log.debug('%s = %s' % (varname, val,)) ## START LOGGING ## startTime = dcu.get_now_sql_datetime() log.debug( '--- STARTING TO RUN THE cl_utils.py AT %s' % (startTime,)) # set options interactively if user requests if "interactiveFlag" in locals() and interactiveFlag: # load previous settings moduleDirectory = os.path.dirname(__file__) + "/resources" pathToPickleFile = "%(moduleDirectory)s/previousSettings.p" % locals() try: with open(pathToPickleFile): pass previousSettingsExist = True except: previousSettingsExist = False previousSettings = {} if previousSettingsExist: previousSettings = pickle.load(open(pathToPickleFile, "rb")) # x-raw-input # x-boolean-raw-input # x-raw-input-with-default-value-from-previous-settings # save the most recently used requests pickleMeObjects = [] pickleMe = {} theseLocals = locals() for k in pickleMeObjects: pickleMe[k] = theseLocals[k] pickle.dump(pickleMe, open(pathToPickleFile, "wb")) # call the worker function # x-if-settings-or-database-credientials if match: sherlock = classifier( log=log, settings=settings, update=updateFlag, transientIdList=[] ) sherlock.get() if clean: cleaner = cleanup_database_tables( log=log, settings=settings ) cleaner.get() if wiki: updateWiki = update_wiki_pages( log=log, settings=settings ) updateWiki.get() if "dbConn" in locals() and dbConn: dbConn.commit() dbConn.close() ## FINISH LOGGING ## endTime = dcu.get_now_sql_datetime() runningTime = dcu.calculate_time_difference(startTime, endTime) log.debug('-- FINISHED ATTEMPT TO RUN THE cl_utils.py AT %s (RUNTIME: %s) --' % (endTime, runningTime, )) return
def main(): # define parameters ###########################################################: expname = 'myexpe/' data = 'icdar2013word' # data for training/testing eMode = True # edge detection CodeBookName1 = '../codebooks/Patch/codeBook.npy' # codebook name CodeBookName2 = '../codebooks/Verify/codeBook.npy' # codebook name coarseclfname = 'coarse' fineclfname = 'fine' wordgraphclfname = 'wordgraph' pdirname = '../data/' # dir contains all experiment data cdirname = os.path.join(pdirname, expname) clfdir = os.path.join(cdirname, 'clf/') # dir to save classifier rawdir = os.path.join(cdirname, 'raw/') # dir for original image npydir = os.path.join(cdirname, 'npy/') # dir for feature and label npy roitestdir = os.path.join(cdirname, 'roitest/') # dir for region of interest fine detector predir = os.path.join(cdirname, 'pre/') # dir for preprocessing predtxtdir = os.path.join(cdirname, 'pretxt/') # dir for txt file of bounding boxes. txtdir = os.path.join(cdirname, 'txt/') # dir for bounding box txt files # applying coarse detector ###########################################################: mode = 'adaboost' # classification mode for detector lMode = 'foreground' # foreground/whitespace fMode = 'context' # local or contextual psize = 32 ssize = 16 nob = 3 ratio = 0.9 rrange = 30 para0 = (float(psize - ssize)/psize)**2 para1 = 1 - ratio rpower = ratio ** numpy.asarray(range(rrange)) data = pickle_load('detect', cdirname) codebook = codebook_load(CodeBookName1) myDetector = detector(codebook, data, psize, ssize, nob, rpower, para0, para1, lMode, fMode, eMode) myClassifier = classifier() myClassifier.clf_load(coarseclfname, clfdir) myDetector.image_test(rawdir, predir, myClassifier.classifier) # applying fine detector and region growing ###########################################################: mode = 'adaboost' # classification mode for detector lMode = 'foreground' # foreground/whitespace fMode = 'local' # local or contextual rpower = ratio ** numpy.asarray(range(rrange)) codebook = codebook_load(CodeBookName2) data = pickle_load('region', cdirname) myDetector = detector(codebook, data, psize, ssize, nob, rpower, para0, para1, lMode, fMode, eMode) myClassifier = classifier(mode) myClassifier.clf_load(fineclfname, clfdir) myDetector.roi_test(predir, rawdir, roitestdir, myClassifier.classifier) # applying word graph ###########################################################: myClassifier = classifier() myClassifier.clf_load(wordgraphclfname, clfdir) wordbb = wordGraph_test(roitestdir, myClassifier.classifier) wordbb2pred(wordbb, predtxtdir)
def __init__(self, user=True): self.user = user if not user: self.clf = classifier() self.clf.train()
def epco(): pathBuilder(DST) classifier(DOC_EXT_LST, DOC_DST_PATH, SRC) classifier(AUD_EXT_LST, AUD_DST_PATH, SRC) classifier(VID_EXT_LST, VID_DST_PATH, SRC) classifier(IMG_EXT_LST, IMG_DST_PATH, SRC) classifier(ARCH_EXT_LST, ARCH_DST_PATH, SRC) classifier(OTR_EXT_LST, OTR_DST_PATH, SRC)
#Iterating through a dictionary printing all keys & their associated values print("\nPrinting all key-value pairs in dictionary results_dic:") for key in results_dic: print("Filename=", key, " Pet Label=", results_dic[key][0]) ###################################################### from classifier import classifier images_dir = "pet_images" model = "vgg" # Process all files in the results_dic - use images_dir to give fullpath # that indicates the folder and the filename (key) to be used in the # classifier function for key in results_dic: model_label = classifier(images_dir + "/" + key, model) model_label = model_label.lower().strip() truth = results_dic[key][0] if truth in model_label: results_dic[key].extend([model_label, 1]) else: results_dic[key].extend([model_label, 0]) print(results_dic) ########################################################## dogfile = "dognames.txt" # Creates dognames dictionary for quick matching to results_dic labels from # real answer & classifier's answer dognames_dic = dict()
def classify_images(image_dir, pet_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its' key is the pet image filename & it's value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ #Creating a dictionary for comparison results #the key is the filename #value is a list consists of [pet_image_label, classifier_label, 1/0] **1/0 = match or not results_dic = dict() #Retreive the files in pet_dic #The key is the filename for key in pet_dic: #Create a model label by using the input (image address = image_dir + filename) and (model) model_label = classifier(image_dir + key, model) #Edit the format of the model label allows for comparing with pet_dic model_label = model_label.lower() model_label = model_label.strip() #We need to match the key in pet_dic to key in model_label #string function .find() allows us to find the key interested in model_label in classifier truth = pet_dic[key] found = model_label.find(truth) if found >= 0: if ((found == 0 and len(truth) == len(model_label)) or (((found == 0) or (model_label[found - 1] == " ")) and ((found + len(truth) == len(model_label)) or (model_label[found + len(truth):found + len(truth) + 1] in (",", " "))))): #Define the label/key with list value if the label/key match with model_label in classifier #if condition makes sure there is no duplicaiton in results_dic if key not in results_dic: results_dic[key] = [truth, model_label, 1] #Define the label/key if the label/key is not standalone, i.e. being a part of other words #For example, we dont want to find "apple" in "appliepie" else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] #Define the case when found <0, i.e it returns -1 else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] return ( results_dic ) #, print("Finished"), print ("the type of results_dic:", type(results_dic))
def classify_images(images_dir, petlabel_dic, model): """ Creates classifier labels with classifier function, compares labels, and creates a dictionary containing both labels and comparison of them to be returned. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images in this function. Parameters: images_dir - The (full) path to the folder of images that are to be classified by pretrained CNN models (string) petlabel_dic - Dictionary that contains the pet image(true) labels that classify what's in the image, where its' key is the pet image filename & it's value is pet image label where label is lowercase with space between each word in label model - pretrained CNN whose architecture is indicated by this parameter, values must be: resnet alexnet vgg (string) Returns: results_dic - Dictionary with key as image filename and value as a List (index)idx 0 = pet image label (string) idx 1 = classifier label (string) idx 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels """ # Creates dictionary that will have all the results key = filename # value = list [Pet Label, Classifier Label, Match(1=yes,0=no)] results_dic = dict() # Process all files in the petlabels_dic - use images_dir to give fullpath for key in petlabel_dic: # Runs classifier function to classify the images classifier function # inputs: path + filename and model, returns model_label as classifier label model_label = classifier(images_dir + key, model) # Processes the results so they can be compared with pet image labels # set labels to lowercase (lower) and stripping off whitespace(strip) model_label = model_label.lower() model_label = model_label.strip() # defines truth as pet image label and try to find it using find() # string function to find it within classifier label(model_label). truth = petlabel_dic[key] found = model_label.find(truth) # If found (0 or greater) then make sure true answer wasn't found within # another word and thus not really found, if truely found then add to # results dictionary and set match=1(yes) otherwise as match=0(no) if found >= 0: if ((found == 0 and len(truth) == len(model_label)) or (((found == 0) or (model_label[found - 1] == " ")) and ((found + len(truth) == len(model_label)) or (model_label[found + len(truth):found + len(truth) + 1] in (",", " "))))): # found label as stand-alone term (not within label) if key not in results_dic: results_dic[key] = [truth, model_label, 1] # found within a word/term not a label existing on its own else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] # if not found set results dictionary with match=0(no) else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] # Return results dictionary return (results_dic)
endMin = 0 endSec = 40 # Time to miliseconds startTime = startMin * 60 * 1000 + startSec * 1000 endTime = endMin * 60 * 1000 + endSec * 1000 # Opening file and extracting segment song = AudioSegment.from_mp3(path) extract = song[startTime:endTime] # Saving extract.export(Path.cwd() / 'extraction.wav', format="wav") # getting paths of mp3(s) new_paths = [] iterable = Path.cwd().iterdir() for j in iterable: if j.suffix == '.mp3': new_paths.append(j) # deleting them for i in new_paths: os.remove(i) # create spectrogram create_mel(Path.cwd() / 'extraction.wav', 'spectrogram.jpg') # run classifier print(classifier(Path.cwd() / '30s_model', Path.cwd() / 'spectrogram.jpg'))
# REVISED DATE: <=(Date Revised - if any) # PURPOSE: To demonstrate the proper usage of the classifier() function that # is defined in classifier.py This function uses CNN model # architecture that has been pretrained on the ImageNet data to # classify images. The only model architectures that this function # will accept are: 'resnet', 'alexnet', and 'vgg'. See the example # usage below. # # Usage: python test_classifier.py -- will run program from commandline # Imports classifier function for using pretrained CNN to classify images from classifier import classifier # Defines a dog test image from pet_images folder test_image="pet_images/Collie_03797.jpg" # Defines a model architecture to be used for classification # NOTE: this function only works for model architectures: # 'vgg', 'alexnet', 'resnet' model = "vgg" # Demonstrates classifier() functions usage # NOTE: image_classication is a text string - It contains mixed case(both lower # and upper case letter) image labels that can be separated by commas when a # label has more than one word that can describe it. image_classification = classifier(test_image, model).lower().strip() # prints result from running classifier() function print("\nResults from test_classifier.py\nImage:", test_image, "using model:", model, "was classified as a:", image_classification)
def main(): expname = 'myexpe/' log = 'generate ground truth bounding box for fine detector test \r\n' data = 'icdar2013word' # data for training/testing eMode = True # edge detection coarseCodeBookName = '../codebooks/coarseDet/codeBook.npy' # codebook name fineCodeBookName = '../codebooks/fineDet/codeBook.npy' # codebook name mode = 'adaboost' # classification mode for detector lMode = 'foreground' # foreground/whitespace fMode = 'context' # local or contextual psize = 32 ssize = 16 nob = 3 ratio = 0.9 rrange = 30 para0 = (float(psize - ssize)/psize)**2 para1 = 1 - ratio rpower = ratio ** numpy.asarray(range(rrange)) # define parameters ###########################################################: coarseclfname = 'coarseDet' fineclfname = 'fineDet3' pdirname = '../data/' # dir contains all experiment data datalist = 'datalist' cdirname = os.path.join(pdirname, expname) clfdir = os.path.join(cdirname, 'clf/') # dir to save classifier rawdir = os.path.join(cdirname, 'raw/') # dir for original image npydir = os.path.join(cdirname, 'npy/') # dir for feature and label npy roidir = os.path.join(cdirname, 'roi/') # dir for region of interest of coarse detector roitestdir = os.path.join(cdirname, 'roitest/') # dir for region of interest fine detector predir = os.path.join(cdirname, 'pre/') # dir for preprocessing preMapdir = os.path.join(cdirname, 'preMap/') # dir for preprocessing hotmaps predtxtdir = os.path.join(cdirname, 'pretxt/') # dir for txt file of bounding boxes. resdir = os.path.join(cdirname, 'res/') # dir for results mapdir = os.path.join(cdirname, 'map/') # dir for hotmaps pmapdir = os.path.join(cdirname, 'pmap/') # dir for pixel maps txtdir = os.path.join(cdirname, 'txt/') # dir for bounding box txt files # write log file, a simple discription of experiment with open(os.path.join(cdirname, 'log.txt'), 'a') as f: f.write(log) # parse data ###################################################################: if data == 'icdar2003word': # define direcotries and filenames: imdir = '../icdar2003/icdar2003/SceneTrialTest' # containing original image xmlfilename = '../icdar2003/icdar2003/SceneTrialTest/locations.xml' myParser = parseWord2003() dataList = myParser.parseData(imdir, xmlfilename) elif data == 'icdar2013word': #imdir = '../icdar2013/task21_22/train/image' # containing original image #bbdir = '../icdar2013/task21_22/train/word_label' # containing bb text files. imdir = '../icdar2013/task21_22/test/image' # containing original image bbdir = '../icdar2013/task21_22/test/word_label' # containing bb text files. myParser = parseWord2013() dataList = myParser.parseData(imdir, bbdir) elif data == 'icdar2013char': imdir = '../icdar2013/task21_22/train/image' # containing original image bbdir = '../icdar2013/task21_22/train/char_label' # containing bb text files myParser = parseChar2013() dataList = myParser.parseData(imdir, bbdir) else: raise Exception('No data named:'+data+' found!') dataList = myParser.prepareImg(dataList, imdir, rawdir) pickle_save(dataList, datalist, cdirname) # extract features ############################################################: dataList = pickle_load(datalist, cdirname) codebook = codebook_load(coarseCodeBookName) myDetector = detector(codebook, dataList, psize, ssize, nob, rpower, para0, para1, lMode, fMode, eMode ) myDetector.image_train(rawdir, npydir) # training classsifier ########################################################: myClassifier = classifier(mode) myClassifier.data_load(npydir) # load training data myClassifier.clf_train() # train classifier myClassifier.clf_save(coarseclfname, clfdir) # save classifier myClassifier.clf_load(coarseclfname, clfdir) myClassifier.clf_test() # test classifier
# import the necessary packages import sliding_window import argparse import time import cv2 import pdb import numpy as np import classifier import skimage ### pdb.set_trace() net = classifier.classifier() # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="Path to the image") args = vars(ap.parse_args()) # load the image and define the window width and height image = cv2.imread(args["image"]) (winW, winH) = (64,64) for (x,y,window) in sliding_window.sliding_window(image,stepSize = 8, windowSize=(winW, winH)): if window.shape[0] != winH or window.shape[1] != winW: continue # window = window[:, :, np.newaxis] w = skimage.img_as_float(window).astype(np.float32) prediction = net.predict([w],oversample=False) print x,y,prediction[0][1] cv2.imshow("Window",window) clone = image.copy() if prediction[0][1] >.9: cv2.circle(image,(x+32,y+32),4,(0,0,255),-1) cv2.rectangle(clone,(x,y),(x+winW,y+winH),(255,255,0),2)
def classify_images(image_dir, results_dic, model): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary using the extend function. Be sure to format the classifier labels so that they will match your pet image labels. The format will include putting the classifier labels in all lower case letters and strip the leading and trailing whitespace characters from them. For example, the Classifier function returns = 'Maltese dog, Maltese terrier, Maltese' so the classifier label = 'maltese dog, maltese terrier, maltese'. Recall that dog names from the classifier function can be a string of dog names separated by commas when a particular breed of dog has multiple dog names associated with that breed. For example, you will find pet images of a 'dalmatian'(pet label) and it will match to the classifier label 'dalmatian, coach dog, carriage dog' if the classifier function correctly classified the pet images of dalmatians. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images within this function Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) --- where index 1 & index 2 are added by this function --- NEW - index 1 = classifier label (string) NEW - index 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ # Process all files in the results_dic - use images_dir to give fullpath # that indicates the folder and the filename (key) to be used in the # classifier function for key in results_dic: # TODO: 3a. Set the string variable model_label to be the string that's # returned from using the classifier function instead of the # empty string below. # # Runs classifier function to classify the images classifier function # inputs: path + filename and model, returns model_label # as classifier label #model = model model_label = classifier(image_dir + key, model) #concatenate full path # TODO: 3b. BELOW REPLACE pass with CODE to process the model_label to # convert all characters within model_label to lowercase # letters and then remove whitespace characters from the ends # of model_label. Be certain the resulting processed string # is named model_label. # # Processes the results so they can be compared with pet image labels # set labels to lowercase (lower) and stripping off whitespace(strip) model_label = str(model_label.lower().strip()) # defines truth as pet image label truth = results_dic[key][0] # TODO: 3c. REPLACE pass BELOW with CODE that uses the extend list function # to add the classifier label (model_label) and the value of # 1 (where the value of 1 indicates a match between pet image # label and the classifier label) to the results_dic dictionary # for the key indicated by the variable key # # If the pet image label is found within the classifier label list of terms # as an exact match to on of the terms in the list - then they are added to # results_dic as an exact match(1) using extend list function if truth in model_label: results_dic[key].extend((model_label,1)) # TODO: 3d. REPLACE pass BELOW with CODE that uses the extend list function # to add the classifier label (model_label) and the value of # 0 (where the value of 0 indicates NOT a match between the pet # image label and the classifier label) to the results_dic # dictionary for the key indicated by the variable key # # if not found then added to results dictionary as NOT a match(0) using # the extend function else: results_dic[key].extend((model_label,0))
# Usage: python test_classifier.py -- will run program from commandline # Imports classifier function for using pretrained CNN to classify images from classifier import classifier # Defines a dog test image from pet_images folder test_image = "pet_images/Collie_03797.jpg" # Defines a model architecture to be used for classification # NOTE: this function only works for model architectures: # 'vgg', 'alexnet', 'resnet' model = "vgg" # Demonstrates classifier() functions usage # NOTE: image_classication is a text string - It contains mixed case(both lower # and upper case letter) image labels that can be separated by commas when a # label has more than one word that can describe it. image_classification = classifier(test_image, model) print(type(image_classification)) # prints result from running classifier() function print( "\nResults from test_classifier.py\nImage:", test_image, "using model:", model, "was classified as a:", image_classification, )
import io if len(sys.argv) > 1: url = sys.argv[1] else: url = 'https://lenta.ru/news/2016/08/04/peskov_medved/' user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = {'User-Agent': user_agent} req = urllib.request.Request(url, None, headers) try: with urllib.request.urlopen(req) as response: data = response.read().decode('utf-8') parser = parser2.Parser2() parser.feed(data) classifier.classifier(parser.contents) text = '' for content in parser.contents: if content.is_content and content.text: text += content.text + '{'+ str(content.is_content) +'}' + '\n' text = formatter.format(text) with io.open('result.txt', 'w', encoding='utf8') as f: f.write(text) f.close() except HTTPError as e: print('The server couldn\'t fulfill the request.') print('Error code: ', e.code) except URLError as e: print('We failed to reach a server.') print('Reason: ', e.reason)
def classify_images(images_dir, results_dic, model): """ Creates classifier labels with classifier function, compares pet labels to the classifier labels, and adds the classifier label and the comparison of the labels to the results dictionary using the extend function. Be sure to format the classifier labels so that they will match your pet image labels. The format will include putting the classifier labels in all lower case letters and strip the leading and trailing whitespace characters from them. For example, the Classifier function returns = 'Maltese dog, Maltese terrier, Maltese' so the classifier label = 'maltese dog, maltese terrier, maltese'. Recall that dog names from the classifier function can be a string of dog names separated by commas when a particular breed of dog has multiple dog names associated with that breed. For example, you will find pet images of a 'dalmatian'(pet label) and it will match to the classifier label 'dalmatian, coach dog, carriage dog' if the classifier function correctly classified the pet images of dalmatians. PLEASE NOTE: This function uses the classifier() function defined in classifier.py within this function. The proper use of this function is in test_classifier.py Please refer to this program prior to using the classifier() function to classify images within this function Parameters: images_dir - The (full) path to the folder of images that are to be classified by the classifier function (string) results_dic - Results Dictionary with 'key' as image filename and 'value' as a List. Where the list will contain the following items: index 0 = pet image label (string) --- where index 1 & index 2 are added by this function --- NEW - index 1 = classifier label (string) NEW - index 2 = 1/0 (int) where 1 = match between pet image and classifer labels and 0 = no match between labels model - Indicates which CNN model architecture will be used by the classifier function to classify the pet images, values must be either: resnet alexnet vgg (string) Returns: None - results_dic is mutable data type so no return needed. """ # Process all files in the results_dic - use images_dir to give fullpath # that indicates the folder and the filename (key) to be used in the # classifier function results_dic = dict() for key in results_dic: # TODO: 3a. Set the string variable model_label to be the string that's # returned from using the classifier function instead of the # empty string below. # # Runs classifier function to classify the images classifier function # inputs: path + filename and model, returns model_label # as classifier label model_label = classifier(images_dir + key, model) # TODO: 3b. BELOW REPLACE pass with CODE to process the model_label to # convert all characters within model_label to lowercase # letters and then remove whitespace characters from the ends # of model_label. Be certain the resulting processed string # is named model_label. # # Processes the results so they can be compared with pet image labels # set labels to lowercase (lower) and stripping off whitespace(strip) model_label = model_label.lower() model_label = model_label.strip() truth = results_dic[key] found = model_label.find(truth) if found >= 0: if ((found == 0 and len(truth) == len(model_label)) or (((found == 0) or (model_label[found - 1] == " ")) and ((found + len(truth) == len(model_label)) or (model_label[found + len(truth):found + len(truth) + 1] in (",", " "))))): # found label as stand-alone term (not within label) if key not in results_dic: results_dic[key] = [truth, model_label, 1] # found within a word/term not a label existing on its own else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] # if not found set results dictionary with match=0(no) else: if key not in results_dic: results_dic[key] = [truth, model_label, 0] return (results_dic)