def pruneTree(self, L, K, validation_set): bestTree = self.root accuracy = Accuracy.Accuracy(validation_set) # print accuracy for i in range(1, L + 1): currentTree = copy.deepcopy(bestTree) M = random.randint(1, K) for j in range(1, M + 1): nonLeafNodes = self.arrange(currentTree) # print nonLeafNodes N = len(nonLeafNodes) - 1 if N <= 0: return bestTree P = random.randint(1, N) replaceNode = nonLeafNodes[P] replaceNode.val = -1 replaceNode.left = None replaceNode.right = None oldAccuracy = accuracy.calculateAccuracy(bestTree) newAccuracy = accuracy.calculateAccuracy(currentTree) if newAccuracy >= oldAccuracy: bestTree = currentTree self.root = bestTree return bestTree
def main(): train_file = str(sys.argv[1]) attribute_names_adder(train_file) validation_file = sys.argv[2] attribute_names_adder(validation_file) test_file = sys.argv[3] attribute_names_adder(test_file) yesno = str(sys.argv[4]) heu = str(sys.argv[5]) pru = str(sys.argv[6]) L = int(sys.argv[7]) K = int(sys.argv[8]) decisionTree = ID3.DTree(train_file, heu) if yesno == "yes": print('Decision Tree before Pruning:') print(decisionTree) accuracy = Accuracy.Accuracy(test_file) accuracy.calculateAccuracy(decisionTree.root) print('Accuracy before Pruning:') accuracy.displayAccuracy() if (pru == 'r'): decisionTree.pruneTree(L, K, validation_file) elif (pru == 'n'): print('No pruning') return if yesno == "yes": print('Decision Tree after Pruning :') print(decisionTree) accuracy.calculateAccuracy(decisionTree.root) print('Accuracy after Pruning:') accuracy.displayAccuracy()
def forwardSelection(data): accuracy_at_level = [] answer_accuracy = 0 answer_set = [] N = len(data) M = len(data[0]) current_features = [] for i in range(1, M): print("On search-tree level number", i) feature_to_add = -1 best_accuracy = 0 best_num_wrong = float('inf') for j in range(1, M): if (j not in current_features): print("Considering feature number ", j) accuracy, num_wrong = Accuracy.Accuracy( data, N, current_features, j, best_num_wrong) if (accuracy > best_accuracy): best_accuracy = accuracy feature_to_add = j best_num_wrong = num_wrong if (feature_to_add == -1): print("ERROR: feature to add is -1") exit() else: current_features.append(feature_to_add) print("On level", i, "I added feature", feature_to_add, "which gave an accuracy of", best_accuracy) if (best_accuracy > answer_accuracy): answer_accuracy = best_accuracy answer_set = current_features[:] accuracy_at_level.append(best_accuracy) return answer_set, answer_accuracy, accuracy_at_level
def main (k, m="means", init_type="random"): # Starting clustering timer start_cluster = timeit.default_timer() # Initialize clusters if init_type == "random": initial_clusters = Initialize.random_centers(k) else: init_type = "kplusplus" initial_clusters = Initialize.kmeans_plusplus(k, train_images_flat,\ dist_fn=Distance.sumsq) # Run clustering algorithm final_responsibilities, final_clusters = Kmeans.kmeans(k,train_images_flat, initial_clusters, distfn = Distance.sumsq, method=m) # Find and print clustering time end_cluster = timeit.default_timer() clustering_time = end_cluster - start_cluster print "Time spent clustering : ", clustering_time # Save representative images to file. title = m + "_" + init_type + "_cluster" + str(k) File.save_images(k, train_images, final_responsibilities, final_clusters, title) ########################################################################### # Calculate Accuracy # ########################################################################### # Calculate final accuracy for clusters final, cluster_set = Accuracy.final_accuracy(final_responsibilities, train_labels, train_images_flat, final_clusters) # Now see how well we can classify the dataset start_cluster_test = timeit.default_timer() predictions = ClassifyClusters.classify(cluster_set, test_images_flat, test_labels, distfn = Distance.sumsq) finish_cluster_test = timeit.default_timer() # find time it took to test testing_time = finish_cluster_test - start_cluster_test print "Time spent testing : ", testing_time ########################################################################### # Outputs # ########################################################################### # k, prediction level, cluster_set, results = {"k" : k, "prediction_accuracy" : predictions[1], "cluster_means" : cluster_set, "cluster_stats" : final, "clustering_time" : clustering_time, "testing_time" : testing_time} with open('./results/' + title + '/' + title + '_results.json', 'w') as outfile: json.dump(results, outfile, cls=File.NumpyEncoder)
def search_SIFT_BF(returnCount=100, mydataSIFT=mydataSIFT,SIFT_FEATURES_LIMIT=SIFT_FEATURES_LIMIT): imagepredictions , searchtimesift = ImageSearch_Algo_SIFT.SIFT_SEARCH_BF(mydataSIFT, q_path, sift_features_limit=SIFT_FEATURES_LIMIT, lowe_ratio=LOWE_RATIO, predictions_count=returnCount) a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_sift_BF'] = a row_dict['index_sift_BF'] = ind row_dict['Count_sift_BF'] = cnt row_dict['quality_sift_BF'] = d row_dict['time_sift_BF'] = searchtimesift return imagepredictions, searchtimesift
def search_ORB_BF2(returnCount=100, mydataORB=mydataORB, write=False) : imagematches, searchtime = ImageSearch_Algo_ORB.ORB_SEARCH_MODBF(mydataORB, q_path, ORB_FEATURES_LIMIT , lowe_ratio=LOWE_RATIO, predictions_count=returnCount ) if write: a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_orb_BF2'] = a row_dict['index_orb_BF2'] = ind row_dict['Count_orb_BF2'] = cnt row_dict['quality_orb_BF2'] = d row_dict['time_orb_BF2'] = searchtime return imagematches, searchtime
def search_ORB_BOVW (returnCount=100, write=False) : imagematches, searchtime = ImageSearch_Algo_ORB.ORB_SEARCH_TREE(q_path, myORBmodel, myORBtree, mydataORB, returnCount=100, kp=ORB_FEATURES_LIMIT) if write: a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_orb_tree'] = a row_dict['index_orb_tree'] = ind row_dict['Count_orb_tree'] = cnt row_dict['quality_orb_tree'] = d row_dict['time_orb_tree'] = searchtime return imagematches, searchtime
def search_SIFT_BOVW(returnCount=100, write=False): imagematches, searchtime = ImageSearch_Algo_SIFT.SIFT_SEARCH_TREE(q_path, mySIFTmodel, mySIFTtree, mydataSIFT, returnCount=returnCount, kp=100) if write: a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_sift_tree'] = a row_dict['index_sift_tree'] = ind row_dict['Count_sift_tree'] = cnt row_dict['quality_sift_tree'] = d row_dict['time_sift_tree'] = searchtime return imagematches, searchtime
def search_HASH_All(returnCount=100, write=False): # AlgoGenList = ['whash', 'phash', 'dhash', 'ahash'] for algo in AlgoGenList : imagematches, searchtime = ImageSearch_Algo_Hash.HASH_SEARCH_TREE(myHASH_Trees[algo], mydataHASH, q_path, hashAlgo=algo, hashsize=16, returnCount=returnCount) if write: a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_HASH_'+str(algo)] = a row_dict['index_HASH_'+str(algo)] = ind row_dict['Count_HASH_'+str(algo)] = cnt row_dict['quality_HASH_'+str(algo)] = d row_dict['time_HASH_'+str(algo)] = searchtime
def search_HASH_HYBRID (returnCount=100, write=False): # HybridAlgoList = ['whash', 'ahash'] imagematches, searchtime = ImageSearch_Algo_Hash.HASH_SEARCH_HYBRIDTREE( myHybridtree, mydataHASH, q_path,hashAlgoList=HybridAlgoList, hashsize=16, returnCount=returnCount) if write: a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 ) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) row_dict['acc_HASH_Hybrid'] = a row_dict['index_HASH_Hybrid'] = ind row_dict['Count_HASH_Hybrid'] = cnt row_dict['quality_HASH_Hybrid'] = d row_dict['time_HASH_Hybrid'] = searchtime return imagematches, searchtime
def search_RGB(returnCount=100, mydataRGB=mydataRGB, write=False) : imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (myRGBtree, mydataRGB, q_path, returnCount=returnCount) # y= autothreshold (imagematchesrgb) # toplist = toplist + y # print (y) if write: a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20) row_dict['acc_rgb'] = a row_dict['index_rgb'] = ind row_dict['Count_rgb'] = cnt row_dict['quality_rgb'] = d row_dict['time_rgb'] = searchtimergb # print ('RGB Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) return imagematchesrgb , searchtimergb
def search_HSV(returnCount=100, write=False): imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE ( myHSVtree, mydataHSV, q_path, returnCount=returnCount) if write: a, d, i_hsv, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20) row_dict['acc_hsv'] = a row_dict['index_hsv'] = i_hsv row_dict['Count_hsv'] = cnt row_dict['quality_hsv'] = d row_dict['time_hsv'] = searchtimehsv # print ('HSV Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', i_hsv) # x = autothreshold (imagematcheshsv) # toplist = toplist + x # # print (x) return imagematcheshsv , searchtimehsv
def search_RGB_Corr(returnCount=100, mydataRGB=mydataRGB, write=False): imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH(mydataRGB, q_path, correl_threshold=RGB_PARAMETERCORRELATIONTHRESHOLD) # y= autothreshold (imagematchesrgb) # toplist = toplist + y # print (y) if write: a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20) row_dict['acc_rgb_corr'] = a row_dict['index_rgb_corr'] = ind row_dict['Count_rgb_corr'] = cnt row_dict['quality_rgb_corr'] = d row_dict['time_rgb_corr'] = searchtimergb # print ('RGB Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', ind) return imagematchesrgb , searchtimergb
def reduced_error_pruning(self, validation_set): bestTree = self.root accuracy = Accuracy.Accuracy(validation_set) currentTree = copy.deepcopy(bestTree) nonleafNodes = self.arrange(currentTree) if(len(nonleafNodes) == 0): return bestTree for i in range(len(nonleafNodes)-1): replaceNode = nonleafNodes[i] replaceNode.val = -1 replaceNode.left = None replaceNode.right = None oldAccuracy = accuracy.calculateAccuracy(bestTree) newAccuracy = accuracy.calculateAccuracy(currentTree) if newAccuracy >= oldAccuracy: bestTree = currentTree self.root = bestTree return bestTree
def algomixerAppend (algos, return_count, algoname='NewAlgo') : # myAlgos = [ search_RGB, search_SIFT_BF ] # algomixerFunnel (myAlgos) start = time.time() algoResults = [] algoTimes = [] for algo in algos: thisResult, thisTime = algo_selector (algo, return_count=return_count) algoResults.append(thisResult) algoTimes.append(thisTime) # generate algo uniques: apply threshold for each Result (imagematches) unique_final_list = [] for result in algoResults : unique_final_list.append(Thresholding.autothreshold_knee(result)) # MERGE OPEARATION FROM ALL RESULTS # algo uniques + final algo detections results toplist = unique_final_list.copy() # retaining all the individual results as well (P.S: note difference from algomixerFunnel) for result in algoResults : toplist.append(Thresholding.imagepredictions_to_list(result)) # merge lists of algo results and remove duplicates order[[commons], [algo1], [algo2]...] toplist = Thresholding.merge_results( toplist, False) t = time.time() - start # find accuracy and append to dict a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 ) print ('index F_'+algoname+': ', ind) row_dict['acc_'+ algoname] = a row_dict['index_'+ algoname] = ind row_dict['Count_'+ algoname] = cnt row_dict['quality_'+ algoname] = d row_dict['time_'+ algoname] = t
def backwardDeletion(data): accuracy_at_level = [] answer_set = [] answer_accuracy = 0 N = len(data) M = len(data[0]) # start with all of the features features = list(range(1, M)) for count in range(1, M): best_num_wrong = float('inf') best_accuracy = 0 feature_to_omit = -1 for i in range(len(features)): # take out i'th feature temp = [] if (i < len(features) - 1): temp = features[:i] + features[i + 1:] else: temp = features[:-1] temp_accuracy, num_wrong = Accuracy.Accuracy( data, N, temp, None, best_num_wrong) if (temp_accuracy > best_accuracy): best_accuracy = temp_accuracy feature_to_omit = i best_num_wrong = num_wrong print("Omitting feature", features[feature_to_omit]) if (feature_to_omit < len(features) - 1): features = features[:feature_to_omit] + features[feature_to_omit + 1:] else: features = features[:-1] print("This leaves us with", features, "and an accuracy of ", best_accuracy) if (best_accuracy > answer_accuracy): answer_accuracy = best_accuracy answer_set = features accuracy_at_level.append(best_accuracy) return answer_set, answer_accuracy, accuracy_at_level
# ------------------ SEARCH TEST # q_path = random.sample(fileterd_file, 1)[0] # q_path = './imagesbooks/ukbench00481.jpg' # imagepredictions , searchtime = SIFT_SEARCH(mydata1, q_path, 50 ,0.75, 20) imagepredictions, searchtime = ImageSearch_Algo_SIFT(mydatasift, q_path, 100, 0.75, 20) # to reload module: uncomment use the following # %load_ext autoreload # %autoreload 2 import Accuracy as accuracy a, b, c, d = accuracy.accuracy_matches(q_path, imagepredictions, 50) print('Accuracy =', a, '%', d, searchtime) # import ImageSearch_Plots as myplots # myplots.plot_predictions(imagepredictions[:20], q_path) #---------------- Compile data and plot results # q_paths = random.sample(imagepaths, 50) # random sample 100 items in list # q_paths = ['./imagesbooks/ukbench05960.jpg', './imagesbooks/ukbench00459.jpg', './imagesbooks/ukbench06010.jpg', './imagesbooks/ukbench06104.jpg', './imagesbooks/ukbench00458.jpg'] q_paths = [ './imagesbooks/ukbench05960.jpg', './imagesbooks/ukbench00459.jpg', './imagesbooks/ukbench06010.jpg', './imagesbooks/ukbench06104.jpg', './imagesbooks/ukbench00458.jpg', './imagesbooks/ukbench00248.jpg', './imagesbooks/ukbench06408.jpg', './imagesbooks/ukbench00303.jpg', './imagesbooks/ukbench03124.jpg', './imagesbooks/ukbench05776.jpg',
# while lines: # # print lines # line = re.sub('\s{2,}', '', tesa) # save_result.write(line) # lines = tesa.readline() # if not lines: # break # # file.close() # files.close() # print ("Panjang tesa: ", len(tesa)) # print ("Panjang tesa[0]: ", len(tesa[1])) # print (tesa[0][0]) # accu = Accuracy.accuracy(text) text = Accuracy.load_preprocess() (prec, rec, fm) = Accuracy.accuracy2(text, tesa) # print("Precision : ", precision) # rec = Accuracy.accuracy(text) # print("Recall : ", rec) # Fmeas = Accuracy.accuracy(text) # print("F1 measure : ", Fmeas) print("Precision:", prec) print("Recall:", rec) print("Fmeasure: ", fm) # ytdist = np.array([662., 877., 255., 412., 996., 295., 468., 268., # 400., 754., 564., 138., 219., 869., 669.])
def evalModel(self): self.one = Variable(torch.FloatTensor([1.0])) self.one = self.one.to(self.device) self.accObj = Accuracy.Accuracy() self.computeEmbeddingQuality()
mydataHSV, mytime = ImageSearch_Algo_HSV.HSV_GEN(imagepaths) print('HSV Feature Generation time', mytime) #------------ HSV SEARCH TEST------------------------------# q_path = random.sample(imagepaths, 1)[0] # imagematches , searchtime = ImageSearch_Algo_HSV.HSV_SEARCH(mydataHSV, q_path) imagematches, searchtime = ImageSearch_Algo_HSV.HSV_SEARCH(mydataHSV, q_path) print('HSV Search time', searchtime) # to reload module: uncomment use the following # %load_ext autoreload # %autoreload 2 a, m, pos, cnt = accuracy.accuracy_matches(q_path, imagematches, 20) print('Accuracy =', a, '%', ' | Quality: ', m) # ----- Alternative tree search code [Optimized search time ] # test TREE SEARCH code # to create a new tree from dataframe features 'mydataHSV' mytree = ImageSearch_Algo_HSV.HSV_Create_Tree(mydataHSV, savefile='HSV_Tree') # to load an existing tree thistree = ImageSearch_Algo_HSV.HSV_Load_Tree('HSV_Tree') # sample 1 image q_path = random.sample(imagepaths, 1)[0]
start = time.time() # get the feature for this image q_kp, q_des = ImageSearch_Algo_SIFT.FEATURE (q_path) # get bow cluster q_clustered_words = cluster_model.predict(q_des) # get FV histogram q_bow_hist = np.array([np.bincount(q_clustered_words, minlength=n_clusters)]) # search the KDTree for nearest match dist, result = SIFTtree2.query(q_bow_hist, k=100) t= time.time() - start print (result) print ('SIFT Search Tree: ', t , ' secs') flist = list (mydataSIFT.iloc[ result[0].tolist()]['file']) slist = list (dist[0]) matches = tuple(zip( slist, flist)) # create a list of tuples frm 2 lists a, q, pos, cnt = accuracy.accuracy_matches(q_path, matches, 20) print('Accuracy =', a, '%', '| Quality:', q) print('Count', cnt, ' | position', pos) ################# Unsupervised Clustering ################### # --------------------------------------------------------- # from sklearn.cluster import KMeans from sklearn.mixture import GaussianMixture # --------- find unsupervised cluster ID with KMEANS X = img_bow_hist km = KMeans(n_clusters=200) km.fit(X) km.predict(X) labels = km.labels_ print (labels) # # update labels to original dataframe
matcheshsv = [] matchesrgb = [] for q_path in q_paths: toplist = [] start = time.time() row_dict = {'file':q_path } imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE (mytreeHSV, mydataHSV, q_path, 100) x = autothreshold (imagematcheshsv) toplist = toplist + x # print (x) a, d, i_hsv, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20) row_dict['hsv_acc'] = a row_dict['hsv_matchindex'] = i_hsv row_dict['hsv_Count'] = cnt row_dict['hsv_time'] = searchtimehsv print ('HSV Accuracy =', a, '%', '| Quality:', d ) print ('Count', cnt, ' | position', i_hsv) imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (mytreeRGB, mydataRGB, q_path, 100) y= autothreshold (imagematchesrgb) toplist = toplist + y # print (y) a, d, i_rgb, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20) row_dict['rgb_acc'] = a row_dict['rgb_matchindex'] = i_rgb
scores, ind = HSVtree.query(F, k=100) t = time.time() - start print (ind) print (scores) # get the index of searchimage print (mydataHSV.index[mydataHSV['file'] == q_path]) print (q_path) print ( "Search took ", t, ' secs') # Zip results into a list of tuples (score , file) & calculate score flist = list (mydataHSV.iloc[ ind[0].tolist()]['file']) slist = list (scores[0]) result = tuple(zip( slist, flist)) # create a list of tuples from 2 lists a , q, pos, cnt = accuracy.accuracy_matches(q_path, result, 100) print ('Accuracy =', a, '%', '| Quality:', q ) print ('Count', cnt, ' | position', pos) # -------------------- KD Tree HASH (phash, ahash, dhash, whash) --------------------- # Avg. Time per search: 0.033 s from sklearn.neighbors import KDTree import imagehash import numpy as np import time # YD = np.array(mydataHASH['phash'].apply(imagehash.ImageHash.__hash__)) YD = list(mydataHASH['ahash'])
import matplotlib.pyplot as PLT import PrepareData as PD import Accuracy as ACC accuracy = ACC.count_accuracy() PD.original_graph_data() #PLOTTING ORIGINAL FEATURES PLT.scatter(PD.p1x, PD.p1y, label="setosa", color="red", marker="o", s=50) PLT.scatter(PD.p2x, PD.p2y, label="versicolor", color="green", marker="o", s=50) PLT.scatter(PD.p3x, PD.p3y, label="virginica", color="blue", marker="o", s=50) #PLOTTING PREDICTED FEATURES PLT.scatter(ACC.pred_x, ACC.pred_y, label="prediction", color="black", marker="|", s=100) PLT.xlabel('sepal length') PLT.ylabel('sepal width') PLT.title(accuracy) PLT.legend() PLT.show()
q_paths = random.sample(imagepaths, 20) # random sample 100 items in list # q_paths = ["./imagesbooks/ukbench00196.jpg", "./imagesbooks/ukbench00199.jpg", "./imagesbooks/ukbench00296.jpg", "./imagesbooks/ukbench00298.jpg", "./imagesbooks/ukbench00299.jpg", "./imagesbooks/ukbench00300.jpg", "./imagesbooks/ukbench00302.jpg", "./imagesbooks/ukbench00303.jpg", "./imagesbooks/ukbench02730.jpg", "./imagesbooks/ukbench02740.jpg", "./imagesbooks/ukbench02743.jpg", "./imagesbooks/ukbench05608.jpg", "./imagesbooks/ukbench05932.jpg", "./imagesbooks/ukbench05933.jpg", "./imagesbooks/ukbench05934.jpg", "./imagesbooks/ukbench05935.jpg", "./imagesbooks/ukbench05952.jpg", "./imagesbooks/ukbench05953.jpg", "./imagesbooks/ukbench05954.jpg", "./imagesbooks/ukbench05955.jpg", "./imagesbooks/ukbench05956.jpg", "./imagesbooks/ukbench05957.jpg", "./imagesbooks/ukbench05958.jpg", "./imagesbooks/ukbench05959.jpg", "./imagesbooks/ukbench06148.jpg", "./imagesbooks/ukbench06149.jpg", "./imagesbooks/ukbench06150.jpg", "./imagesbooks/ukbench06151.jpg", "./imagesbooks/ukbench06558.jpg", "./imagesbooks/ukbench06559.jpg", "./imagesbooks/ukbench07285.jpg", "./imagesbooks/ukbench07588.jpg", "./imagesbooks/ukbench07589.jpg", "./imagesbooks/ukbench07590.jpg", "./imagesbooks/ukbench08540.jpg", "./imagesbooks/ukbench08542.jpg", "./imagesbooks/ukbench08592.jpg"] counter = 1 for q_path in q_paths: imagematchesrgb, searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE( myRGBtree, mydataRGB, q_path, 100) # imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH(mydataRGB, q_path, 0.7) # to reload module: uncomment use the following # %load_ext autoreload # %autoreload 2 import Accuracy as accuracy a, d, i_rgb, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20) # print ('Accuracy =', a, '%', '| Quality:', d ) # print ('Count', cnt, ' | position', i_rgb) # import ImageSearch_Plots as myplots # myplots.plot_predictions(imagematchesrgb, q_path) score = [] matchesposition = [0] * len(imagematchesrgb) for i in i_rgb: matchesposition[i] = 1 # score, file = item for item in imagematchesrgb: x, y = item score.append(x)
def algomixerFunnel (algos, return_count, finalalgo, finalalgoDataframe, algoname='NewAlgo', write=False) : ''' algos [list]: list of candidare algos return_count (int) : number of candidates to be generated finalalgo (str) : list of candidare algos finalalgoDataframe (pd.Dataframe): dataframe of the finalAlgo to be filtered and used algoname (str) : 'column name of datframe for the final reported accuracy, time etc. write (bool): True / False -> whether to report funnel accurracy, time before merge with thresholded candidates ''' # myAlgos = [ search_RGB, search_SIFT_BF ] # algomixerFunnel (myAlgos) start = time.time() algoResults = [] algoTimes = [] for algo in algos: algoResult, algoTime = algo_selector (algo, return_count=return_count) algoResults.append(algoResult) algoTimes.append(algoTime) # generate candidates (short listing) filteredFeatureData = Thresholding.filter_candidates( algoResults, finalalgoDataframe) # run Final Algo (detection) imagepredictions,searchtimesift = algo_selector_final(finalalgo,filteredFeatureData,return_count=return_count) if write: # find accuracy and append to dict a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20) t = time.time() - start row_dict['acc_'+ algoname + '_BM'] = a row_dict['index_'+ algoname + '_BM'] = ind row_dict['Count_'+ algoname + '_BM'] = cnt row_dict['quality_'+ algoname + '_BM'] = d row_dict['time_'+ algoname + '_BM'] = t # generate algo uniques: apply threshold for each Result (imagematches) unique_final_list = [] for Result in algoResults : unique_final_list.append(Thresholding.autothreshold_knee(Result)) # print (unique_final_list) # MERGE OPEARATION FROM ALL RESULTS # algo uniques + final algo detections results final_algo_List = Thresholding.imagepredictions_to_list(imagepredictions) toplist = unique_final_list.copy() # copy all commons from candidates threshold list to front (2 lists) toplist = [Thresholding.merge_results( toplist, False)] # Add final algo derivatives to toplist toplist.append(final_algo_List) # merge lists of algo results: HSV Thresh, RGB Thresh, SIFT toplist = Thresholding.merge_results( toplist, False) t = time.time() - start # find accuracy and append to dict a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 ) print ('index F_'+algoname+': ', ind) row_dict['acc_'+ algoname] = a row_dict['index_'+ algoname] = ind row_dict['Count_'+ algoname] = cnt row_dict['quality_'+ algoname] = d row_dict['time_'+ algoname] = t
PBEest = ConditionalProb.ConstructConditionalProbBE(Vocab, Totwords, classes) #Classify the training set: using Bayesian estimate first import NBAnalysis #step one: construct the Posterior estimates print('Calculating Posterior estimates') Pxw, Docs = NBAnalysis.CalcPosteriors(docid, Trainlabels, wordid, wordcount, classes, PBEest, Prior, Vocab) #Docs is total number of documents print('Select best class estimate for prediction') #step two: choose best estimate TrainClassEst = NBAnalysis.FindBestPxw(Pxw, Docs, classes) import Accuracy print('Calculating class Accuracy for Training set') ClassAccTr = Accuracy.ClassAcc(TrainClassEst, Trainlabels, Docs, classes) print('Class', 'Accuracy') for j in range(classes): print(j + 1, ClassAccTr[j]) print('Calculating Overall Accuracy for Training set') #step three: calculate Accuracy AccuTrain = Accuracy.Accuracy(TrainClassEst, Trainlabels, Docs) print('Overall Accuracy of the BE on the Training Data =', AccuTrain) #step four: confusion matrix print('Now calculating the confusion matrix for the training data') ConfuseTrain = Accuracy.Confusion(TrainClassEst, Trainlabels, Docs, classes) print('Confusion matrix entries through (4,4)') print(ConfuseTrain[1][1], ConfuseTrain[1][2], ConfuseTrain[1][3], ConfuseTrain[1][4]) print(ConfuseTrain[2][1], ConfuseTrain[2][2], ConfuseTrain[2][3],
def search_AlgoA ( candidates=100, verbose=False ): toplist = [] start = time.time() # run RGB imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (myRGBtree, mydataRGB, q_path, returnCount=candidates) # run HSV imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE ( myHSVtree, mydataHSV, q_path, returnCount=candidates) # create shortlist for SIFT filteredSIFTData = Thresholding.filter_sift_candidates( [imagematcheshsv, imagematchesrgb], mydataSIFT) # run SIFT imagepredictions , searchtimesift = ImageSearch_Algo_SIFT.SIFT_SEARCH_BF(filteredSIFTData, q_path, sift_features_limit=100 , lowe_ratio=LOWE_RATIO, predictions_count=SIFT_PREDICTIONS_COUNT) # threshold RGB final_RGB_List = Thresholding.autothreshold_knee(imagematchesrgb) # thresold HSV final_HSV_List = Thresholding.autothreshold_knee(imagematcheshsv) # MERGE OPEARATION FROM ALL RESULTS # SIFT LIST final_SIFT_List = Thresholding.imagepredictions_to_list(imagepredictions) # merge lists of algo results: HSV Thresh, RGB Thresh, SIFT toplist = Thresholding.merge_results([final_HSV_List, final_RGB_List, final_SIFT_List], False) # find accuracy and append to dict a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 ) t = time.time() - start row_dict['acc_algo_A'] = a row_dict['index_algo_A'] = ind row_dict['Count_algo_A'] = cnt row_dict['quality_algo_A'] = d row_dict['time_algo_A'] = t # run if verbose enabled; DEBUGGING if verbose: print ('index FINAL AlgoA: ', ind) # append SIFT Results a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20 ) print ('SIFT-A Accuracy =', a, '%', '| Quality:', d ) print ('SIFT-A Count', cnt, ' | position', ind) row_dict['acc_Algo_A_SIFT'] = a row_dict['index_Algo_A_SIFT'] = ind row_dict['Count_Algo_A_SIFT'] = cnt row_dict['quality_Algo_A_SIFT'] = d # row_dict['time_Algo_A_SIFT'] = searchtime # get current accurracy of RGB a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20) print ('index RGB : ', ind) # get thresholded accurracy of RGB a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, final_RGB_List, 20 ) print ('index RGB Th:', ind) # update candidates RGB row_dict['index_Algo_A_cRGB'] = ind # get current accurracy for HSV a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20) print ('index HSV : ', ind) # get thresholded accurracy for HSV a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, final_HSV_List, 20 ) print ('index HSV Th: ', ind) # update candidates row_dict['index_Algo_A_cHSV'] = ind return imagepredictions, t
test_images,test_labels = File.load_mnist("testing",path=os.getcwd()) # flatten training images into 60,000 x 784 array train_images_flat = np.array([np.ravel(img) for img in train_images]) test_images_flat = np.array([np.ravel(img) for img in test_images]) ############################################################################### # Run Scikit_learn # ############################################################################### k = int(sys.argv[1]) # number of clusters (system argument) # Train k means model kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10) kmeans_fit = kmeans.fit(train_images_flat) # Get the cluster assignments of each point of training images kmeans_labels = kmeans_fit.labels_ kmeans_centers = kmeans_fit.cluster_centers_ # Initialize a vector of responsibilities in a one-hot-coded format. final_responsibilities = np.zeros((len(train_images_flat),k)) # For each cluster assignment, assign the appropriate vector in the # one-hot-coded format to a 1. for imgnum in range(len(train_images_flat)): final_responsibilities[imgnum][kmeans_labels[imgnum]] = 1 # Obtain predictions for each point. Z = kmeans.predict(test_images_flat) # Determine accuracies. Accuracy.final_accuracy(final_responsibilities, train_labels, train_images_flat, kmeans_centers)
def main_run(): # h = [.01, .01, .01,.01, .01,.01, .01,.01] # # h = pd.Series([0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025],dtype=float) # # # x = [3., 2., 1.5,2., 1.5,3., 4.5,4.9] # # y = [4.2,1.9,1.9,2.1,2.0 ,2.5,3.9,4.5] # # x = [1.5, 2., 1.5,2., 1.5,3., 4.5,4.9] # y = [4.2,1.9,1.9,2.1,2.0 ,2.5,3.9,4.5] # z = [0.012,0.032,0.035,0.902,0.052,0.302,-0.909,0.902] # # df.corr() # b = [1., -1., -1.,1., 1.,-1., 1,-1] # p = [0.12,0.55,0.45,0.3,0.53,0.57,0.19,0.58] start_value = 1000000 # symbols = ['AC','ALI','BDO','BPI','DMC','GLO', # 'GTCAP','HCP','JFC','MBT','MPI','MEG', # 'RLC','SECB','SM','SMPH','TEL','URC'] # symbols = ['AAPL','IBM','XOM','GLD'] # symbols = ['goog','aapl','msft','amzn'] symbols = ['AAPL', 'MSFT','ORCL','QCOM','BBY','MU','GILD','YUM','NFLX','VZ','APA','RRC','MDLZ','CSCO','V','MET','SBUX','GGP','UA','GM'] h = np.ones(len(symbols)) addressable_dates = pd.date_range('2012-01-01','2015-12-31') df = utils.get_data_online(symbols, addressable_dates) df = pd.DataFrame(df,index=df.index,dtype=np.float64) df = df.dropna() x = np.asarray(utils.daily_returns(df[symbols[1:]]).std()) # print x y = np.asarray(utils.daily_returns(df[symbols[1:]]).mean()) # print y # b = [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.] z = np.asarray(utils.get_correlation(df)[1:]) # print z # p = [.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5] b = np.ones(len(symbols) - 1) # print b p = Accuracy.get_accuracy(symbols,addressable_dates) # print p sharpe_ratios = utils.get_sharpe_ratio(utils.daily_returns(df[symbols[1:]])) # print sharpe_ratios data = (x,y,z,b,p) x_sorted = np.sort(x) y_sorted = np.sort(y) # print "h",np.transpose( h) * y # print "cost", error(h) # cons = ({'type':'eq', 'fun': lambda x: 1 - sum(x)}) cons = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # bounds = tuple((0,1) for it in h) bnds = tuple((0,1) for it in h) bounds = bnds # bounds = (0,1) min_result = spo.minimize(error,h,args=(data,), method='SLSQP',bounds=bounds, constraints=cons, options={'disp':True}) print "Parameters = {}, Y = {}".format(np.round(min_result.x,2), np.abs( min_result.fun)) # for tracing # print "xdata",xdata model_bounds = max(x.max(),y.max()) xdata = np.linspace(0,5,8) y_main = func(xdata,model_bounds,1,-model_bounds) print "bias", b * np.round(min_result.x,3) print "risk: ", x print "rewards: ", y print "correlation: ", z print "accuracy: ", p print "y main",y_main plt.plot(xdata,y_main) y = func(xdata,2.5,1.3,0.5) ydata = y + 0.2 * np.random.normal(size=len(xdata)) coeffs, pcov = curve_fit(func,xdata,ydata) yaj = func(xdata, coeffs[0], coeffs[1], coeffs[2]) print pcov # plt.scatter(xdata,ydata) # plt.plot(xdata,yaj) # c,p = curve_fit(sigmoid,x_sorted,y_sorted) plt.scatter(x_sorted,y_sorted) plt.show() return None