Esempio n. 1
0
	def pruneTree(self, L, K, validation_set):

		bestTree = self.root

		accuracy = Accuracy.Accuracy(validation_set)
        # print accuracy
		for i in range(1, L + 1):

			currentTree = copy.deepcopy(bestTree)

			M = random.randint(1, K)

			for j in range(1, M + 1):

				nonLeafNodes = self.arrange(currentTree)
                # print nonLeafNodes
				N = len(nonLeafNodes) - 1
				if N <= 0:
					return bestTree

				P = random.randint(1, N)

				replaceNode = nonLeafNodes[P]
				replaceNode.val = -1
				replaceNode.left = None
				replaceNode.right = None

			oldAccuracy = accuracy.calculateAccuracy(bestTree)
			newAccuracy = accuracy.calculateAccuracy(currentTree)

			if newAccuracy >= oldAccuracy:
			    bestTree = currentTree

		self.root = bestTree
		return bestTree
Esempio n. 2
0
def main():

    train_file = str(sys.argv[1])
    attribute_names_adder(train_file)
    validation_file = sys.argv[2]
    attribute_names_adder(validation_file)
    test_file = sys.argv[3]
    attribute_names_adder(test_file)
    yesno = str(sys.argv[4])
    heu = str(sys.argv[5])
    pru = str(sys.argv[6])
    L = int(sys.argv[7])
    K = int(sys.argv[8])
    decisionTree = ID3.DTree(train_file, heu)
    if yesno == "yes":
        print('Decision Tree before Pruning:')
        print(decisionTree)

    accuracy = Accuracy.Accuracy(test_file)
    accuracy.calculateAccuracy(decisionTree.root)
    print('Accuracy before Pruning:')
    accuracy.displayAccuracy()

    if (pru == 'r'):
        decisionTree.pruneTree(L, K, validation_file)
    elif (pru == 'n'):
        print('No pruning')
        return
    if yesno == "yes":
        print('Decision Tree after Pruning :')
        print(decisionTree)
    accuracy.calculateAccuracy(decisionTree.root)
    print('Accuracy after Pruning:')
    accuracy.displayAccuracy()
def forwardSelection(data):
    accuracy_at_level = []
    answer_accuracy = 0
    answer_set = []
    N = len(data)
    M = len(data[0])
    current_features = []
    for i in range(1, M):
        print("On search-tree level number", i)
        feature_to_add = -1
        best_accuracy = 0
        best_num_wrong = float('inf')

        for j in range(1, M):
            if (j not in current_features):
                print("Considering feature number ", j)
                accuracy, num_wrong = Accuracy.Accuracy(
                    data, N, current_features, j, best_num_wrong)
                if (accuracy > best_accuracy):
                    best_accuracy = accuracy
                    feature_to_add = j
                    best_num_wrong = num_wrong
        if (feature_to_add == -1):
            print("ERROR: feature to add is -1")
            exit()
        else:
            current_features.append(feature_to_add)
            print("On level", i, "I added feature", feature_to_add,
                  "which gave an accuracy of", best_accuracy)
        if (best_accuracy > answer_accuracy):
            answer_accuracy = best_accuracy
            answer_set = current_features[:]
        accuracy_at_level.append(best_accuracy)
    return answer_set, answer_accuracy, accuracy_at_level
Esempio n. 4
0
def main (k, m="means", init_type="random"):
    # Starting clustering timer
    start_cluster = timeit.default_timer()

    # Initialize clusters
    if init_type == "random":
        initial_clusters = Initialize.random_centers(k)
    else:
        init_type = "kplusplus"
        initial_clusters = Initialize.kmeans_plusplus(k, train_images_flat,\
            dist_fn=Distance.sumsq)
        
    # Run clustering algorithm
    final_responsibilities, final_clusters = Kmeans.kmeans(k,train_images_flat,
        initial_clusters, distfn = Distance.sumsq, method=m)

    # Find and print clustering time
    end_cluster = timeit.default_timer()
    clustering_time = end_cluster - start_cluster
    print "Time spent clustering : ", clustering_time

    # Save representative images to file.
    title = m + "_" + init_type + "_cluster" + str(k)
    File.save_images(k, train_images, final_responsibilities, 
                     final_clusters, title)

    ###########################################################################
    #                           Calculate Accuracy                            #
    ###########################################################################

    # Calculate final accuracy for clusters
    final, cluster_set = Accuracy.final_accuracy(final_responsibilities, 
        train_labels, train_images_flat, final_clusters)

    # Now see how well we can classify the dataset
    start_cluster_test = timeit.default_timer()
    predictions = ClassifyClusters.classify(cluster_set, test_images_flat, 
        test_labels, distfn = Distance.sumsq)
    finish_cluster_test = timeit.default_timer()

    # find time it took to test 
    testing_time = finish_cluster_test - start_cluster_test
    print "Time spent testing : ", testing_time

    ###########################################################################
    #                                 Outputs                                 #
    ###########################################################################

    # k, prediction level, cluster_set, 
    results = {"k" : k, "prediction_accuracy" : predictions[1], 
    "cluster_means" : cluster_set, "cluster_stats" : final,
    "clustering_time" : clustering_time, "testing_time" : testing_time}

    with open('./results/' + title + '/' + title + '_results.json', 'w') as outfile:
        json.dump(results, outfile, cls=File.NumpyEncoder)
Esempio n. 5
0
def search_SIFT_BF(returnCount=100, mydataSIFT=mydataSIFT,SIFT_FEATURES_LIMIT=SIFT_FEATURES_LIMIT): 
    imagepredictions , searchtimesift = ImageSearch_Algo_SIFT.SIFT_SEARCH_BF(mydataSIFT, q_path, sift_features_limit=SIFT_FEATURES_LIMIT, lowe_ratio=LOWE_RATIO, predictions_count=returnCount)
    a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20 )
    # print ('Accuracy =',  a, '%', '| Quality:', d )
    # print ('Count', cnt, ' | position', ind)
    row_dict['acc_sift_BF'] = a
    row_dict['index_sift_BF'] = ind
    row_dict['Count_sift_BF'] = cnt
    row_dict['quality_sift_BF'] = d
    row_dict['time_sift_BF'] = searchtimesift

    return imagepredictions, searchtimesift
Esempio n. 6
0
def search_ORB_BF2(returnCount=100, mydataORB=mydataORB, write=False) :
    imagematches, searchtime = ImageSearch_Algo_ORB.ORB_SEARCH_MODBF(mydataORB, q_path, ORB_FEATURES_LIMIT , lowe_ratio=LOWE_RATIO, predictions_count=returnCount )
    if write: 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 )
        # print ('Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)
        row_dict['acc_orb_BF2'] = a
        row_dict['index_orb_BF2'] = ind
        row_dict['Count_orb_BF2'] = cnt
        row_dict['quality_orb_BF2'] = d
        row_dict['time_orb_BF2'] = searchtime

    return imagematches, searchtime
Esempio n. 7
0
def search_ORB_BOVW (returnCount=100, write=False) : 
    imagematches, searchtime = ImageSearch_Algo_ORB.ORB_SEARCH_TREE(q_path, myORBmodel, myORBtree, mydataORB, returnCount=100, kp=ORB_FEATURES_LIMIT)
    if write: 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 )
        # print ('Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)
        row_dict['acc_orb_tree'] = a
        row_dict['index_orb_tree'] = ind
        row_dict['Count_orb_tree'] = cnt
        row_dict['quality_orb_tree'] = d
        row_dict['time_orb_tree'] = searchtime
    
    return imagematches, searchtime
Esempio n. 8
0
def search_SIFT_BOVW(returnCount=100, write=False): 
    imagematches, searchtime = ImageSearch_Algo_SIFT.SIFT_SEARCH_TREE(q_path, mySIFTmodel, mySIFTtree, mydataSIFT, returnCount=returnCount, kp=100)
    if write: 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 )
        # print ('Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)
        row_dict['acc_sift_tree'] = a
        row_dict['index_sift_tree'] = ind
        row_dict['Count_sift_tree'] = cnt
        row_dict['quality_sift_tree'] = d
        row_dict['time_sift_tree'] = searchtime

    return imagematches, searchtime
Esempio n. 9
0
def search_HASH_All(returnCount=100, write=False): 
    # AlgoGenList = ['whash', 'phash', 'dhash', 'ahash']    
    for algo in AlgoGenList :
        imagematches, searchtime = ImageSearch_Algo_Hash.HASH_SEARCH_TREE(myHASH_Trees[algo], mydataHASH, q_path, hashAlgo=algo, hashsize=16, returnCount=returnCount)
        if write: 
            a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 )
            # print ('Accuracy =',  a, '%', '| Quality:', d )
            # print ('Count', cnt, ' | position', ind)
            row_dict['acc_HASH_'+str(algo)] = a
            row_dict['index_HASH_'+str(algo)] = ind
            row_dict['Count_HASH_'+str(algo)] = cnt
            row_dict['quality_HASH_'+str(algo)] = d
            row_dict['time_HASH_'+str(algo)] = searchtime
Esempio n. 10
0
def search_HASH_HYBRID (returnCount=100, write=False): 
    # HybridAlgoList = ['whash', 'ahash']
    imagematches, searchtime = ImageSearch_Algo_Hash.HASH_SEARCH_HYBRIDTREE( myHybridtree, mydataHASH, q_path,hashAlgoList=HybridAlgoList, hashsize=16, returnCount=returnCount)
    if write: 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagematches, 20 )
        # print ('Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)
        row_dict['acc_HASH_Hybrid'] = a
        row_dict['index_HASH_Hybrid'] = ind
        row_dict['Count_HASH_Hybrid'] = cnt
        row_dict['quality_HASH_Hybrid'] = d
        row_dict['time_HASH_Hybrid'] = searchtime

    return imagematches, searchtime
Esempio n. 11
0
def search_RGB(returnCount=100, mydataRGB=mydataRGB, write=False) : 
    imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (myRGBtree, mydataRGB, q_path, returnCount=returnCount)
    # y= autothreshold (imagematchesrgb)
    # toplist = toplist + y
    # print (y)
    if write: 
        a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20)
        row_dict['acc_rgb'] = a
        row_dict['index_rgb'] = ind
        row_dict['Count_rgb'] = cnt
        row_dict['quality_rgb'] = d
        row_dict['time_rgb'] = searchtimergb
        # print ('RGB Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)

    return imagematchesrgb , searchtimergb 
Esempio n. 12
0
def search_HSV(returnCount=100, write=False): 
    imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE ( myHSVtree, mydataHSV, q_path, returnCount=returnCount)
    if write: 
        a, d, i_hsv, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20)
        row_dict['acc_hsv'] = a
        row_dict['index_hsv'] = i_hsv
        row_dict['Count_hsv'] = cnt
        row_dict['quality_hsv'] = d
        row_dict['time_hsv'] = searchtimehsv
        # print ('HSV Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', i_hsv)
        # x = autothreshold (imagematcheshsv)
        # toplist = toplist + x
        # # print (x)

    return imagematcheshsv , searchtimehsv    
Esempio n. 13
0
def search_RGB_Corr(returnCount=100, mydataRGB=mydataRGB, write=False): 
    imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH(mydataRGB, q_path, correl_threshold=RGB_PARAMETERCORRELATIONTHRESHOLD)
    # y= autothreshold (imagematchesrgb)
    # toplist = toplist + y
    # print (y)
    if write: 
        a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20)
        row_dict['acc_rgb_corr'] = a
        row_dict['index_rgb_corr'] = ind
        row_dict['Count_rgb_corr'] = cnt
        row_dict['quality_rgb_corr'] = d
        row_dict['time_rgb_corr'] = searchtimergb
        # print ('RGB Accuracy =',  a, '%', '| Quality:', d )
        # print ('Count', cnt, ' | position', ind)

    return imagematchesrgb , searchtimergb 
Esempio n. 14
0
	def reduced_error_pruning(self, validation_set):
		bestTree = self.root
		accuracy = Accuracy.Accuracy(validation_set)
		currentTree = copy.deepcopy(bestTree)
		nonleafNodes = self.arrange(currentTree)
		if(len(nonleafNodes) == 0):
			return bestTree
		for i in range(len(nonleafNodes)-1):
			replaceNode = nonleafNodes[i]
			replaceNode.val = -1
			replaceNode.left = None
			replaceNode.right = None
			oldAccuracy = accuracy.calculateAccuracy(bestTree)
			newAccuracy = accuracy.calculateAccuracy(currentTree)
			if newAccuracy >= oldAccuracy:
			    bestTree = currentTree
		self.root = bestTree
		return bestTree
Esempio n. 15
0
def algomixerAppend (algos, return_count, algoname='NewAlgo') : 
    # myAlgos = [ search_RGB, search_SIFT_BF ]
    # algomixerFunnel (myAlgos)
    start = time.time()

    algoResults = []
    algoTimes = []
    for algo in algos: 
        thisResult, thisTime = algo_selector (algo, return_count=return_count)
        algoResults.append(thisResult)
        algoTimes.append(thisTime)
    
    # generate algo uniques: apply threshold for each Result (imagematches)     
    unique_final_list = []
    for result in algoResults : 
        unique_final_list.append(Thresholding.autothreshold_knee(result))

    # MERGE OPEARATION FROM ALL RESULTS 
    # algo uniques + final algo detections results 

    toplist = unique_final_list.copy()

    # retaining all the individual results as well (P.S: note difference from algomixerFunnel)
    for result in algoResults :    
        toplist.append(Thresholding.imagepredictions_to_list(result))

    # merge lists of algo results and remove duplicates order[[commons], [algo1], [algo2]...]
    toplist = Thresholding.merge_results( toplist, False)

    t = time.time() - start

    # find accuracy and append to dict 
    a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 )
    print ('index F_'+algoname+': ', ind)

    row_dict['acc_'+ algoname] = a
    row_dict['index_'+ algoname] = ind
    row_dict['Count_'+ algoname] = cnt
    row_dict['quality_'+ algoname] = d
    row_dict['time_'+ algoname] = t
def backwardDeletion(data):
    accuracy_at_level = []
    answer_set = []
    answer_accuracy = 0
    N = len(data)
    M = len(data[0])
    # start with all of the features
    features = list(range(1, M))
    for count in range(1, M):
        best_num_wrong = float('inf')
        best_accuracy = 0
        feature_to_omit = -1
        for i in range(len(features)):
            # take out i'th feature
            temp = []
            if (i < len(features) - 1):
                temp = features[:i] + features[i + 1:]
            else:
                temp = features[:-1]
            temp_accuracy, num_wrong = Accuracy.Accuracy(
                data, N, temp, None, best_num_wrong)
            if (temp_accuracy > best_accuracy):
                best_accuracy = temp_accuracy
                feature_to_omit = i
                best_num_wrong = num_wrong
        print("Omitting feature", features[feature_to_omit])
        if (feature_to_omit < len(features) - 1):
            features = features[:feature_to_omit] + features[feature_to_omit +
                                                             1:]
        else:
            features = features[:-1]
        print("This leaves us with", features, "and an accuracy of ",
              best_accuracy)
        if (best_accuracy > answer_accuracy):
            answer_accuracy = best_accuracy
            answer_set = features
        accuracy_at_level.append(best_accuracy)
    return answer_set, answer_accuracy, accuracy_at_level
Esempio n. 17
0
# ------------------ SEARCH TEST

# q_path = random.sample(fileterd_file, 1)[0]
# q_path = './imagesbooks/ukbench00481.jpg'
# imagepredictions , searchtime = SIFT_SEARCH(mydata1, q_path, 50 ,0.75, 20)
imagepredictions, searchtime = ImageSearch_Algo_SIFT(mydatasift, q_path, 100,
                                                     0.75, 20)

# to reload module: uncomment use the following
# %load_ext autoreload
# %autoreload 2

import Accuracy as accuracy

a, b, c, d = accuracy.accuracy_matches(q_path, imagepredictions, 50)
print('Accuracy =', a, '%', d, searchtime)

# import ImageSearch_Plots as myplots
# myplots.plot_predictions(imagepredictions[:20], q_path)

#---------------- Compile data and plot results

# q_paths = random.sample(imagepaths, 50)  # random sample 100 items in list
# q_paths = ['./imagesbooks/ukbench05960.jpg', './imagesbooks/ukbench00459.jpg', './imagesbooks/ukbench06010.jpg', './imagesbooks/ukbench06104.jpg', './imagesbooks/ukbench00458.jpg']
q_paths = [
    './imagesbooks/ukbench05960.jpg', './imagesbooks/ukbench00459.jpg',
    './imagesbooks/ukbench06010.jpg', './imagesbooks/ukbench06104.jpg',
    './imagesbooks/ukbench00458.jpg', './imagesbooks/ukbench00248.jpg',
    './imagesbooks/ukbench06408.jpg', './imagesbooks/ukbench00303.jpg',
    './imagesbooks/ukbench03124.jpg', './imagesbooks/ukbench05776.jpg',
Esempio n. 18
0
    # while lines:
    #     # print lines
    #     line = re.sub('\s{2,}', '', tesa)
    #     save_result.write(line)
    #     lines = tesa.readline()
    #     if not lines:
    #         break
    #
    # file.close()
    # files.close()

    # print ("Panjang tesa: ", len(tesa))
    # print ("Panjang tesa[0]: ", len(tesa[1]))
    # print (tesa[0][0])
    # accu = Accuracy.accuracy(text)

    text = Accuracy.load_preprocess()
    (prec, rec, fm) = Accuracy.accuracy2(text, tesa)
    # print("Precision : ", precision)
    # rec = Accuracy.accuracy(text)
    # print("Recall : ", rec)
    # Fmeas = Accuracy.accuracy(text)
    # print("F1 measure : ", Fmeas)

    print("Precision:", prec)
    print("Recall:", rec)
    print("Fmeasure: ", fm)

    # ytdist = np.array([662., 877., 255., 412., 996., 295., 468., 268.,
    #                    400., 754., 564., 138., 219., 869., 669.])
Esempio n. 19
0
 def evalModel(self):
   self.one = Variable(torch.FloatTensor([1.0]))
   self.one = self.one.to(self.device)
   self.accObj = Accuracy.Accuracy()
   self.computeEmbeddingQuality()
Esempio n. 20
0
mydataHSV, mytime = ImageSearch_Algo_HSV.HSV_GEN(imagepaths)
print('HSV Feature Generation time', mytime)

#------------ HSV SEARCH TEST------------------------------#

q_path = random.sample(imagepaths, 1)[0]

# imagematches , searchtime = ImageSearch_Algo_HSV.HSV_SEARCH(mydataHSV, q_path)
imagematches, searchtime = ImageSearch_Algo_HSV.HSV_SEARCH(mydataHSV, q_path)
print('HSV Search time', searchtime)

# to reload module: uncomment use the following
# %load_ext autoreload
# %autoreload 2

a, m, pos, cnt = accuracy.accuracy_matches(q_path, imagematches, 20)
print('Accuracy =', a, '%', ' | Quality: ', m)

# ----- Alternative tree search code [Optimized search time ]

# test TREE SEARCH code

# to create a new tree from dataframe features 'mydataHSV'
mytree = ImageSearch_Algo_HSV.HSV_Create_Tree(mydataHSV, savefile='HSV_Tree')

# to load an existing tree
thistree = ImageSearch_Algo_HSV.HSV_Load_Tree('HSV_Tree')

# sample 1 image
q_path = random.sample(imagepaths, 1)[0]
Esempio n. 21
0
start = time.time()
# get the feature for this image 
q_kp, q_des = ImageSearch_Algo_SIFT.FEATURE (q_path)
# get bow cluster
q_clustered_words = cluster_model.predict(q_des) 
# get FV histogram  
q_bow_hist = np.array([np.bincount(q_clustered_words, minlength=n_clusters)])
# search the KDTree for nearest match
dist, result = SIFTtree2.query(q_bow_hist, k=100)
t= time.time() - start
print (result)
print ('SIFT Search Tree: ', t , ' secs')
flist = list (mydataSIFT.iloc[ result[0].tolist()]['file'])
slist = list (dist[0])
matches = tuple(zip( slist, flist)) # create a list of tuples frm 2 lists
a, q, pos, cnt = accuracy.accuracy_matches(q_path, matches, 20)
print('Accuracy =',  a, '%', '| Quality:', q)
print('Count', cnt, ' | position', pos)

################# Unsupervised Clustering ###################
# --------------------------------------------------------- #
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
# --------- find unsupervised cluster ID with KMEANS  
X = img_bow_hist
km = KMeans(n_clusters=200)
km.fit(X)
km.predict(X)
labels = km.labels_
print (labels)
# # update labels to original dataframe
Esempio n. 22
0
matcheshsv = []
matchesrgb = []

for q_path in q_paths: 

    toplist = []

    start = time.time()

    row_dict = {'file':q_path }   
    imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE (mytreeHSV, mydataHSV, q_path, 100)

    x = autothreshold (imagematcheshsv)
    toplist = toplist + x
    # print (x)
    a, d, i_hsv, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20)
    row_dict['hsv_acc'] = a
    row_dict['hsv_matchindex'] = i_hsv
    row_dict['hsv_Count'] = cnt
    row_dict['hsv_time'] = searchtimehsv
    print ('HSV Accuracy =',  a, '%', '| Quality:', d )
    print ('Count', cnt, ' | position', i_hsv)
  

    imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (mytreeRGB, mydataRGB, q_path, 100)
    y= autothreshold (imagematchesrgb)
    toplist = toplist + y
    # print (y)
    a, d, i_rgb, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20)
    row_dict['rgb_acc'] = a
    row_dict['rgb_matchindex'] = i_rgb
Esempio n. 23
0
scores, ind = HSVtree.query(F, k=100)
t = time.time() - start 

print (ind)
print (scores)

# get the index of searchimage 
print (mydataHSV.index[mydataHSV['file'] == q_path])
print (q_path)
print ( "Search took ", t, ' secs')

# Zip results into a list of tuples (score , file) & calculate score 
flist = list (mydataHSV.iloc[ ind[0].tolist()]['file'])
slist = list (scores[0])
result = tuple(zip( slist, flist)) # create a list of tuples from 2 lists 
a , q, pos, cnt = accuracy.accuracy_matches(q_path, result, 100)
print ('Accuracy =',  a, '%', '| Quality:', q )
print ('Count', cnt, ' | position', pos)


# --------------------  KD Tree  HASH (phash, ahash, dhash, whash) ---------------------
# Avg. Time per search: 0.033 s


from sklearn.neighbors import KDTree
import imagehash
import numpy as np
import time 

# YD = np.array(mydataHASH['phash'].apply(imagehash.ImageHash.__hash__))
YD = list(mydataHASH['ahash'])
import matplotlib.pyplot as PLT
import PrepareData as PD
import Accuracy as ACC

accuracy = ACC.count_accuracy()

PD.original_graph_data()

#PLOTTING ORIGINAL FEATURES
PLT.scatter(PD.p1x, PD.p1y, label="setosa", color="red", marker="o", s=50)
PLT.scatter(PD.p2x,
            PD.p2y,
            label="versicolor",
            color="green",
            marker="o",
            s=50)
PLT.scatter(PD.p3x, PD.p3y, label="virginica", color="blue", marker="o", s=50)
#PLOTTING PREDICTED FEATURES
PLT.scatter(ACC.pred_x,
            ACC.pred_y,
            label="prediction",
            color="black",
            marker="|",
            s=100)

PLT.xlabel('sepal length')
PLT.ylabel('sepal width')
PLT.title(accuracy)
PLT.legend()
PLT.show()
Esempio n. 25
0
q_paths = random.sample(imagepaths, 20)  # random sample 100 items in list

# q_paths = ["./imagesbooks/ukbench00196.jpg", "./imagesbooks/ukbench00199.jpg",  "./imagesbooks/ukbench00296.jpg",  "./imagesbooks/ukbench00298.jpg",  "./imagesbooks/ukbench00299.jpg",  "./imagesbooks/ukbench00300.jpg",  "./imagesbooks/ukbench00302.jpg",  "./imagesbooks/ukbench00303.jpg",  "./imagesbooks/ukbench02730.jpg",  "./imagesbooks/ukbench02740.jpg",  "./imagesbooks/ukbench02743.jpg",  "./imagesbooks/ukbench05608.jpg",  "./imagesbooks/ukbench05932.jpg",  "./imagesbooks/ukbench05933.jpg",  "./imagesbooks/ukbench05934.jpg",  "./imagesbooks/ukbench05935.jpg",  "./imagesbooks/ukbench05952.jpg",  "./imagesbooks/ukbench05953.jpg",  "./imagesbooks/ukbench05954.jpg",  "./imagesbooks/ukbench05955.jpg",  "./imagesbooks/ukbench05956.jpg",  "./imagesbooks/ukbench05957.jpg",  "./imagesbooks/ukbench05958.jpg",  "./imagesbooks/ukbench05959.jpg",  "./imagesbooks/ukbench06148.jpg",  "./imagesbooks/ukbench06149.jpg",  "./imagesbooks/ukbench06150.jpg",  "./imagesbooks/ukbench06151.jpg",  "./imagesbooks/ukbench06558.jpg",  "./imagesbooks/ukbench06559.jpg",  "./imagesbooks/ukbench07285.jpg",  "./imagesbooks/ukbench07588.jpg",  "./imagesbooks/ukbench07589.jpg",  "./imagesbooks/ukbench07590.jpg",  "./imagesbooks/ukbench08540.jpg",  "./imagesbooks/ukbench08542.jpg",  "./imagesbooks/ukbench08592.jpg"]

counter = 1
for q_path in q_paths:
    imagematchesrgb, searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE(
        myRGBtree, mydataRGB, q_path, 100)
    # imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH(mydataRGB, q_path, 0.7)

    # to reload module: uncomment use the following
    # %load_ext autoreload
    # %autoreload 2

    import Accuracy as accuracy
    a, d, i_rgb, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20)
    # print ('Accuracy =',  a, '%', '| Quality:', d )
    # print ('Count', cnt, ' | position', i_rgb)

    # import ImageSearch_Plots as myplots
    # myplots.plot_predictions(imagematchesrgb, q_path)

    score = []
    matchesposition = [0] * len(imagematchesrgb)
    for i in i_rgb:
        matchesposition[i] = 1

    # score, file = item
    for item in imagematchesrgb:
        x, y = item
        score.append(x)
Esempio n. 26
0
def algomixerFunnel (algos, return_count, finalalgo, finalalgoDataframe, algoname='NewAlgo', write=False) : 
    '''
    algos [list]: list of candidare algos
    return_count (int) : number of candidates to be generated 
    finalalgo (str) : list of candidare algos
    finalalgoDataframe (pd.Dataframe): dataframe of the finalAlgo to be filtered and used 
    algoname (str)  : 'column name of datframe for the final reported accuracy, time etc.
    write (bool): True / False -> whether to report funnel accurracy, time before merge with thresholded candidates 
    '''

    # myAlgos = [ search_RGB, search_SIFT_BF ]
    # algomixerFunnel (myAlgos)
    start = time.time()

    algoResults = []
    algoTimes = []
    for algo in algos: 
        algoResult, algoTime = algo_selector (algo, return_count=return_count)
        algoResults.append(algoResult)
        algoTimes.append(algoTime)

    # generate candidates (short listing)
    filteredFeatureData = Thresholding.filter_candidates( algoResults, finalalgoDataframe)
    # run Final Algo (detection) 
    imagepredictions,searchtimesift = algo_selector_final(finalalgo,filteredFeatureData,return_count=return_count)
    if write: 
        # find accuracy and append to dict 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20)
        t = time.time() - start
        row_dict['acc_'+ algoname + '_BM'] = a
        row_dict['index_'+ algoname + '_BM'] = ind
        row_dict['Count_'+ algoname + '_BM'] = cnt
        row_dict['quality_'+ algoname + '_BM'] = d
        row_dict['time_'+ algoname + '_BM'] = t

    # generate algo uniques: apply threshold for each Result (imagematches)     
    unique_final_list = []
    for Result in algoResults : 
        unique_final_list.append(Thresholding.autothreshold_knee(Result))

    # print (unique_final_list)
    # MERGE OPEARATION FROM ALL RESULTS 
    # algo uniques + final algo detections results 
    final_algo_List = Thresholding.imagepredictions_to_list(imagepredictions)
    
    toplist = unique_final_list.copy()
    # copy all commons from candidates threshold list to front (2 lists)
    toplist = [Thresholding.merge_results( toplist, False)]

    # Add final algo derivatives to toplist 
    toplist.append(final_algo_List)
    # merge lists of algo results: HSV Thresh, RGB Thresh, SIFT
    toplist = Thresholding.merge_results( toplist, False)

    t = time.time() - start

    # find accuracy and append to dict 
    a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 )
    print ('index F_'+algoname+': ', ind)
    row_dict['acc_'+ algoname] = a
    row_dict['index_'+ algoname] = ind
    row_dict['Count_'+ algoname] = cnt
    row_dict['quality_'+ algoname] = d
    row_dict['time_'+ algoname] = t
Esempio n. 27
0
PBEest = ConditionalProb.ConstructConditionalProbBE(Vocab, Totwords, classes)

#Classify the training set: using Bayesian estimate first
import NBAnalysis
#step one: construct the Posterior estimates
print('Calculating Posterior estimates')
Pxw, Docs = NBAnalysis.CalcPosteriors(docid, Trainlabels, wordid, wordcount,
                                      classes, PBEest, Prior, Vocab)
#Docs is total number of documents
print('Select best class estimate for prediction')
#step two: choose best estimate
TrainClassEst = NBAnalysis.FindBestPxw(Pxw, Docs, classes)

import Accuracy
print('Calculating class Accuracy for Training set')
ClassAccTr = Accuracy.ClassAcc(TrainClassEst, Trainlabels, Docs, classes)
print('Class', 'Accuracy')
for j in range(classes):
    print(j + 1, ClassAccTr[j])

print('Calculating Overall Accuracy for Training set')
#step three: calculate Accuracy
AccuTrain = Accuracy.Accuracy(TrainClassEst, Trainlabels, Docs)
print('Overall Accuracy of the BE on the Training Data =', AccuTrain)
#step four: confusion matrix
print('Now calculating the confusion matrix for the training data')
ConfuseTrain = Accuracy.Confusion(TrainClassEst, Trainlabels, Docs, classes)
print('Confusion matrix entries through (4,4)')
print(ConfuseTrain[1][1], ConfuseTrain[1][2], ConfuseTrain[1][3],
      ConfuseTrain[1][4])
print(ConfuseTrain[2][1], ConfuseTrain[2][2], ConfuseTrain[2][3],
Esempio n. 28
0
def search_AlgoA ( candidates=100, verbose=False ): 
    toplist = []
    start = time.time()
    # run RGB
    imagematchesrgb , searchtimergb = ImageSearch_Algo_RGB.RGB_SEARCH_TREE (myRGBtree, mydataRGB, q_path, returnCount=candidates)
    # run HSV
    imagematcheshsv , searchtimehsv = ImageSearch_Algo_HSV.HSV_SEARCH_TREE ( myHSVtree, mydataHSV, q_path, returnCount=candidates)
    # create shortlist for SIFT 
    filteredSIFTData = Thresholding.filter_sift_candidates( [imagematcheshsv, imagematchesrgb], mydataSIFT)
    # run SIFT 
    imagepredictions , searchtimesift = ImageSearch_Algo_SIFT.SIFT_SEARCH_BF(filteredSIFTData, q_path, sift_features_limit=100 , lowe_ratio=LOWE_RATIO, predictions_count=SIFT_PREDICTIONS_COUNT)

    # threshold RGB 
    final_RGB_List = Thresholding.autothreshold_knee(imagematchesrgb)

    # thresold HSV 
    final_HSV_List = Thresholding.autothreshold_knee(imagematcheshsv)

    # MERGE OPEARATION FROM ALL RESULTS 
    # SIFT LIST 
    final_SIFT_List = Thresholding.imagepredictions_to_list(imagepredictions)
    
    # merge lists of algo results: HSV Thresh, RGB Thresh, SIFT
    toplist = Thresholding.merge_results([final_HSV_List, final_RGB_List, final_SIFT_List], False)
    
    # find accuracy and append to dict 
    a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, toplist, 20 )
    t = time.time() - start
    row_dict['acc_algo_A'] = a
    row_dict['index_algo_A'] = ind
    row_dict['Count_algo_A'] = cnt
    row_dict['quality_algo_A'] = d
    row_dict['time_algo_A'] = t

    # run if verbose enabled; DEBUGGING
    if verbose: 

        print ('index FINAL AlgoA: ', ind)
        # append SIFT Results 
        a ,d, ind, cnt = accuracy.accuracy_matches(q_path, imagepredictions, 20 )
        print ('SIFT-A Accuracy =',  a, '%', '| Quality:', d )
        print ('SIFT-A Count', cnt, ' | position', ind)
        row_dict['acc_Algo_A_SIFT'] = a
        row_dict['index_Algo_A_SIFT'] = ind
        row_dict['Count_Algo_A_SIFT'] = cnt
        row_dict['quality_Algo_A_SIFT'] = d
        # row_dict['time_Algo_A_SIFT'] = searchtime
        # get current accurracy of RGB     
        a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematchesrgb, 20)
        print ('index RGB   : ', ind)
        # get thresholded accurracy of RGB     
        a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, final_RGB_List, 20 )
        print ('index RGB Th:', ind)
        # update candidates RGB
        row_dict['index_Algo_A_cRGB'] = ind
        # get current accurracy for HSV
        a, d, ind, cnt = accuracy.accuracy_matches(q_path, imagematcheshsv, 20)
        print ('index HSV   : ', ind)
        # get thresholded accurracy for HSV 
        a ,d, ind, cnt = accuracy.accuracy_from_list(q_path, final_HSV_List, 20 )
        print ('index HSV Th: ', ind)
        # update candidates 
        row_dict['index_Algo_A_cHSV'] = ind


    return imagepredictions, t
test_images,test_labels = File.load_mnist("testing",path=os.getcwd())
# flatten training images into 60,000 x 784 array
train_images_flat = np.array([np.ravel(img) for img in train_images])
test_images_flat = np.array([np.ravel(img) for img in test_images])

###############################################################################
#                               Run Scikit_learn                              #
###############################################################################
k = int(sys.argv[1]) # number of clusters (system argument)

# Train k means model
kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10)
kmeans_fit = kmeans.fit(train_images_flat)
# Get the cluster assignments of each point of training images
kmeans_labels = kmeans_fit.labels_
kmeans_centers = kmeans_fit.cluster_centers_ 

# Initialize a vector of responsibilities in a one-hot-coded format.
final_responsibilities = np.zeros((len(train_images_flat),k))
# For each cluster assignment, assign the appropriate vector in the
# one-hot-coded format to a 1.
for imgnum in range(len(train_images_flat)):
	final_responsibilities[imgnum][kmeans_labels[imgnum]] = 1


# Obtain predictions for each point.
Z = kmeans.predict(test_images_flat)

# Determine accuracies.
Accuracy.final_accuracy(final_responsibilities, train_labels, 
    train_images_flat, kmeans_centers)
def main_run():

    # h = [.01,  .01,   .01,.01,  .01,.01,  .01,.01]
    # # h = pd.Series([0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025],dtype=float)
    #
    # # x = [3.,  2.,   1.5,2.,  1.5,3.,  4.5,4.9]
    # # y = [4.2,1.9,1.9,2.1,2.0  ,2.5,3.9,4.5]
    #
    # x = [1.5,  2.,   1.5,2.,  1.5,3.,  4.5,4.9]
    # y = [4.2,1.9,1.9,2.1,2.0  ,2.5,3.9,4.5]
    # z = [0.012,0.032,0.035,0.902,0.052,0.302,-0.909,0.902]
    # # df.corr()
    # b = [1.,  -1.,   -1.,1.,  1.,-1.,  1,-1]
    # p = [0.12,0.55,0.45,0.3,0.53,0.57,0.19,0.58]

    start_value = 1000000
    # symbols = ['AC','ALI','BDO','BPI','DMC','GLO',
    #            'GTCAP','HCP','JFC','MBT','MPI','MEG',
    #            'RLC','SECB','SM','SMPH','TEL','URC']
    # symbols = ['AAPL','IBM','XOM','GLD']
    # symbols = ['goog','aapl','msft','amzn']
    symbols = ['AAPL', 'MSFT','ORCL','QCOM','BBY','MU','GILD','YUM','NFLX','VZ','APA','RRC','MDLZ','CSCO','V','MET','SBUX','GGP','UA','GM']
    h = np.ones(len(symbols))

    addressable_dates = pd.date_range('2012-01-01','2015-12-31')
    df = utils.get_data_online(symbols, addressable_dates)
    df = pd.DataFrame(df,index=df.index,dtype=np.float64)
    df = df.dropna()

    x = np.asarray(utils.daily_returns(df[symbols[1:]]).std())
    # print x
    y = np.asarray(utils.daily_returns(df[symbols[1:]]).mean())
    # print y
    # b = [1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.,1.]
    z = np.asarray(utils.get_correlation(df)[1:])
    # print z
    # p = [.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5,.5]
    b = np.ones(len(symbols) - 1)
    # print b
    p = Accuracy.get_accuracy(symbols,addressable_dates)
    # print p
    sharpe_ratios = utils.get_sharpe_ratio(utils.daily_returns(df[symbols[1:]]))
    # print sharpe_ratios

    data = (x,y,z,b,p)


    x_sorted = np.sort(x)
    y_sorted = np.sort(y)

    # print "h",np.transpose( h) * y

    # print "cost", error(h)

    # cons = ({'type':'eq', 'fun': lambda x: 1 - sum(x)})
    cons = ({'type': 'eq', 'fun': lambda x:  1 - sum(x)})

    # bounds = tuple((0,1) for it in h)
    bnds = tuple((0,1) for it in h)
    bounds = bnds
    # bounds = (0,1)

    min_result = spo.minimize(error,h,args=(data,), method='SLSQP',bounds=bounds, constraints=cons, options={'disp':True})

    print "Parameters = {}, Y = {}".format(np.round(min_result.x,2), np.abs( min_result.fun)) # for tracing

    # print "xdata",xdata
    model_bounds = max(x.max(),y.max())
    xdata = np.linspace(0,5,8)

    y_main = func(xdata,model_bounds,1,-model_bounds)


    print "bias", b * np.round(min_result.x,3)
    print "risk: ", x
    print "rewards: ", y
    print "correlation: ", z
    print "accuracy: ", p

    print "y main",y_main
    plt.plot(xdata,y_main)


    y = func(xdata,2.5,1.3,0.5)
    ydata = y + 0.2 * np.random.normal(size=len(xdata))


    coeffs, pcov = curve_fit(func,xdata,ydata)

    yaj = func(xdata, coeffs[0], coeffs[1], coeffs[2])

    print pcov


    # plt.scatter(xdata,ydata)
    # plt.plot(xdata,yaj)

    # c,p = curve_fit(sigmoid,x_sorted,y_sorted)


    plt.scatter(x_sorted,y_sorted)



    plt.show()





    return None