Exemple #1
0
    def normalizeHist(self, hist, n_words, n_images):
        print 'Applying Okapi transformation...'

        k1 = 1.2
        b = 0.75

        new_hist = [[0 for x in range(n_words)] for x in range(n_images)]

        bi = simpleBinarization.SimpleBi()
        bi_hist = bi.normalizeHist(hist, n_words, n_images)
        bi_hist = np.asarray(bi_hist)

        hist_np = np.asarray(hist)

        N = n_images

        for i in range(0, n_images):
            for j in range(0, n_words):
                nid = hist[i][j]
                nd = hist_np[i].sum()
                nt = bi_hist[:, j].sum()
                tf = float(k1 * nid) / (nid + k1 * (1 - b + b *
                                                    (nd / self.average_words)))
                idf = math.log(float(N - nt + 0.5) / (nt + 0.5))
                if idf < 0:
                    new_hist[i][j] = 0
                else:
                    new_hist[i][j] = tf * idf

        print 'Okapi method applied'

        return new_hist
Exemple #2
0
    def normalizeHist(self, hist, n_words, n_images):
        print 'Applying tf-idf transformation...'

        new_hist = [[0 for x in range(n_words)] for x in range(n_images)]
        new_new_hist = [[0 for x in range(n_words)] for x in range(n_images)]

        bi = simpleBinarization.SimpleBi()
        bi_hist = bi.normalizeHist(hist, n_words, n_images)

        N = n_images

        for i in range(0, n_images):
            for j in range(0, n_words):
                nid = hist[i][j]
                colunm_bi_hist = [row[j] for row in bi_hist]
                nt = sum(colunm_bi_hist)
                if nt != 0:
                    new_hist[i][j] = (float(nid)) * math.log(1 + float(N) / nt)
                else:
                    new_hist[i][j] = 0

        for i in range(0, n_images):
            for j in range(0, n_words):
                tfidf = new_hist[i][j]
                sum_line = sum(new_hist[i])
                if sum_line != 0:
                    new_new_hist[i][j] = float(tfidf) / sum_line
                else:
                    new_hist[i][j] = 0

        print 'tf-idf applied'

        return new_new_hist
Exemple #3
0
 def normalizeHist(self, hist, n_words, n_images):  
    print 'Applying tf-idf transformation...'
    
    new_hist = [[0 for x in range(n_words)] for x in range(n_images)] 
    
    bi = simpleBinarization.SimpleBi()
    bi_hist = bi.normalizeHist(hist, n_words, n_images)
    bi_hist = np.asarray(bi_hist)
    
    N = n_images
    
    for i in range(0,n_images):
       for j in range(0,n_words):
          nid = hist[i][j]
          print nid
          nt = bi_hist[:, j].sum()
          print nt
          if nt!= 0:  
             new_hist[i][j] = (float(nid))*math.log(1+float(N)/nt)
          else:
             new_hist[i][j] = 0
          print new_hist[i][j]
                
    print 'tf-idf applied'
    
    return new_hist
Exemple #4
0
 def applyFilter(self,hist,n_words,n_images):
     
     bi = simpleBinarization.SimpleBi()
     bi_hist = bi.normalizeHist(hist, n_words, n_images)
     bi_hist = np.array(bi_hist)
     
     words_freq = np.zeros(n_words)
     indexes_remove = []
     
     for i in range(0,n_words):
         word_freq = float(bi_hist[:,i].sum())/n_images
         if word_freq <= self.minThres:
             indexes_remove.append(i)
             
     new_hist = np.delete(np.array(hist), indexes_remove, axis=1)
     
     return new_hist
Exemple #5
0
def run(pathImages, method, numpatch, imsample, percentage, codebook, dist,
        size, fselec, fselec_perc, histnorm, clust, nclusters, rep):

    #################################################################
    #
    # Initializations and result file configurations
    #
    #################################################################

    im_dataset_name = pathImages.split('/')[-1]

    date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')

    name_results_file = 'BOC_' + im_dataset_name + '_' + str(
        numpatch
    ) + '_' + imsample + '_' + codebook + '_' + str(
        size
    ) + '_' + fselec + '_' + histnorm + '_' + clust + '_' + dist + '_' + date_time

    #dir_results = 'Results_' + im_dataset_name + '_BOC_' + date_time
    dir_results = 'Results_BOC'

    if not os.path.exists(dir_results):
        os.makedirs(dir_results)

    file_count = 2
    file_name = os.path.join(dir_results, name_results_file)
    while os.path.exists(file_name + ".txt"):
        file_name = os.path.join(dir_results,
                                 name_results_file) + "_" + str(file_count)
        file_count = file_count + 1
    f = open(file_name + ".txt", 'w')

    #################################################################
    #
    # Get images
    #
    #################################################################

    #pathImages = '/Users/Mariana/mieec/Tese/Development/ImageDatabases/Graz-01_sample'

    imList = get_imlist(pathImages)

    print 'Number of images read = ' + str(len(imList))
    f.write("Number of images in dataset read: " + str(len(imList)) + "\n")

    #################################################################
    #
    # Image description
    #
    #################################################################

    kp_vector = []  #vector with the keypoints object
    des_vector = [
    ]  #vector wih the descriptors (in order to obtain the codebook)
    number_of_kp = []  #vector with the number of keypoints per image

    counter = 1

    #save current time
    start_time = time.time()

    labels = []
    class_names = []

    #ADDED
    imPaths = []

    #number of divisions of the image
    div = int(np.sqrt(numpatch))

    n_images = 0
    #detect the keypoints and compute the sift descriptors for each image
    for im in imList:
        if 'DS_Store' not in im:
            #ADDED
            imPaths.append(im)
            print 'image: ' + str(im) + ' number: ' + str(counter)
            #read image
            img = cv2.imread(im, 1)
            img_gray = cv2.imread(im, 0)
            img_lab = cv2.cvtColor(img, cv.CV_BGR2Lab)

            height, width, comp = img_lab.shape
            h_region = height / div
            w_region = width / div

            des = []
            for i in range(0, div):
                for j in range(0, div):

                    #mask
                    mask = np.zeros(img_gray.shape, dtype=np.uint8)
                    mask[i * h_region:(i + 1) * h_region,
                         j * w_region:(j + 1) * w_region] = 1

                    hist = cv2.calcHist([img_lab], [0, 1, 2], mask,
                                        [256, 256, 256],
                                        [0, 256, 0, 256, 0, 256])

                    max_color_l, max_color_a, max_color_b = np.where(
                        hist == np.max(hist))
                    des.append(
                        [max_color_l[0], max_color_a[0], max_color_b[0]])

            number_of_kp.append(div * div)
            if counter == 1:
                des_vector = des
            else:
                des_vector = np.concatenate((des_vector, des), axis=0)
            counter += 1

            #for evaluation
            name1 = im.split("/")[-1]
            name = name1.split("_")[0]

            if name in class_names:
                index = class_names.index(name)
                labels.append(index)
            else:
                class_names.append(name)
                index = class_names.index(name)
                labels.append(index)

            n_images = n_images + 1

    #measure the time to compute the description of each image (divide time elapsed by # of images)
    elapsed_time = (time.time() - start_time) / len(imList)
    print 'Time to compute detector and descriptor for each image = ' + str(
        elapsed_time)

    f.write(
        'Average time to compute detector and descriptor for each image = ' +
        str(elapsed_time) + '\n')

    average_words = sum(number_of_kp) / float(len(number_of_kp))

    print 'Total number of features = ' + str(len(des_vector))
    f.write('Total number of features obtained = ' + str(len(des_vector)) +
            '\n')
    print 'Average number of keypoints per image = ' + str(average_words)
    f.write('Average number of keypoints per image = ' + str(average_words) +
            '\n')

    #################################################################
    #
    # Image and Keypoint sampling
    #
    #################################################################

    rand_indexes = []
    nmi_indexes = []

    for iteraction in range(0, rep):

        print "\nIteraction #" + str(iteraction + 1) + '\n'
        f.write("\nIteraction #" + str(iteraction + 1) + '\n')

        print 'Sampling images and keypoints prior to codebook computation...'

        if imsample != "NONE":

            sampleKp = sampleKeypoints.SamplingImandKey(
                n_images, number_of_kp, average_words, percentage)
            sampleallKp = sampleAllKeypoints.SamplingAllKey(percentage)

            names_sampling = np.array(["SAMPLEI", "SAMPLEP"])
            sample_method = np.array([sampleKp, sampleallKp])

            #Get the sampling method passed in the -g argument
            index = np.where(names_sampling == imsample)[0]
            if index.size > 0:
                sampling_to_use = sample_method[index[0]]
            else:
                print 'Wrong sampling method passed in the -g argument. Options: NONE, SAMPLEI, SAMPLEP'
                sys.exit()

            #FOR RESULTS FILE
            sampling_to_use.writeFile(f)

            des_vector_sampled = sampling_to_use.sampleKeypoints(des_vector)

            print 'Total number of features after sampling = ' + str(
                len(des_vector_sampled))
            f.write('Total number of features after sampling = ' +
                    str(len(des_vector_sampled)) + '\n')

            print 'Images and keypoints sampled...'

        else:
            print 'No sampling method chosen'
            #FOR RESULTS FILE
            f.write(
                "No method of keypoint sampling chosen. Use all keypoints for codebook construction \n"
            )
            des_vector_sampled = des_vector

        #################################################################
        #
        # Codebook computation
        #
        #################################################################

        print 'Obtaining codebook...'

        #save current time
        start_time = time.time()

        #Get detector classes
        codebook_kmeans = KMeans1.KMeans1(size)
        codebook_birch = Birch.Birch(size)
        codebook_minibatch = minibatch.MiniBatch(size)
        codebook_randomv = randomSamplesBook.RandomVectors(size)
        codebook_allrandom = allrandom.AllRandom(size)

        names_codebook = np.array(
            ["KMEANS", "BIRCH", "MINIBATCH", "RANDOMV", "RANDOM"])
        codebook_algorithm = np.array([
            codebook_kmeans, codebook_birch, codebook_minibatch,
            codebook_randomv, codebook_allrandom
        ])

        #Get the codebook algorithm passed in the -c argument
        index = np.where(names_codebook == codebook)[0]
        if index.size > 0:
            codebook_to_use = codebook_algorithm[index[0]]
        else:
            print 'Wrong codebook construction algorithm name passed in the -c argument. Options: KMEANS, MINIBATCH, RANDOMV and RANDOM'
            sys.exit()

        #FOR RESULTS FILE
        codebook_to_use.writeFileCodebook(f)

        #Get centers and projections using codebook algorithm
        ceters, projections = codebook_to_use.obtainCodebook(
            des_vector_sampled, des_vector)

        elapsed_time = (time.time() - start_time)
        print 'Time to compute codebook = ' + str(elapsed_time)
        f.write('Time to compute codebook = ' + str(elapsed_time) + '\n')

        #################################################################
        #
        # Obtain Histogram
        #
        #################################################################

        print 'Obtaining histograms...'

        #print 'projection shape = '+ str(projections.shape)
        #print 'size = ' + str(size)
        #print 'n of images = ' + str(n_images)
        #print 'number of kp' + str(number_of_kp)

        hist = histogram.computeHist(projections, size, n_images, number_of_kp)
        print hist
        print 'Histograms obtained'

        ################################################################
        #
        # Feature selection
        #
        #################################################################

        print 'Number of visual words = ' + str(len(hist[0]))

        if fselec != "NONE":

            print 'Applying feature selection to descriptors...'

            filter_max = filterMax.WordFilterMax(fselec_perc[0])
            filter_min = filterMin.WordFilterMin(fselec_perc[1])
            filter_maxmin = filterMaxMin.WordFilterMaxMin(
                fselec_perc[0], fselec_perc[1])

            names_filter = np.array(["FMAX", "FMIN", "FMAXMIN"])
            filter_method = np.array([filter_max, filter_min, filter_maxmin])

            #Get the feature selection method passed in the -f argument
            index = np.where(names_filter == fselec)[0]
            if index.size > 0:
                filter_to_use = filter_method[index[0]]
            else:
                print 'Wrong codebook construction algorithm name passed in the -f argument. Options: NONE, FMAX, FMIN, FMAXMIN'
                sys.exit()

            hist = filter_to_use.applyFilter(hist, size, n_images)

            #FOR RESULTS FILE
            filter_to_use.writeFile(f)

            new_size = hist.shape[1]

            print 'Visual words Filtered'
            print 'Number of visual words filtered = ' + str(size - new_size)
            f.write("Number of visual words filtered = " +
                    str(size - new_size) + '\n')
            print 'Final number of visual words = ' + str(new_size)
            f.write('Final number of visual words = ' + str(new_size) + '\n')

        else:
            #FOR RESULTS FILE
            filter_min = filterMin.WordFilterMin(0)
            hist = filter_min.applyFilter(hist, size, n_images)
            new_size = hist.shape[1]
            print 'Number of visual words filtered = ' + str(size - new_size)
            f.write("No feature selection applied \n")

        #################################################################
        #
        # Histogram Normalization
        #
        #################################################################

        if histnorm != "NONE":

            #Get detector classes
            norm_sbin = simpleBinarization.SimpleBi()
            norm_tfnorm = tfnorm.Tfnorm()
            norm_tfidf = tfidf.TfIdf()
            norm_tfidf2 = tfidf2.TfIdf2()
            norm_tfidfnorm = tfidfnorm.TfIdfnorm()
            norm_okapi = okapi.Okapi(average_words)
            norm_power = powerNorm.PowerNorm()

            names_normalization = np.array([
                "SBIN", "TFNORM", "TFIDF", "TFIDF2", "TFIDFNORM", "OKAPI",
                "POWER"
            ])
            normalization_method = np.array([
                norm_sbin, norm_tfnorm, norm_tfidf, norm_tfidf2,
                norm_tfidfnorm, norm_okapi, norm_power
            ])

            #Get the detector passed in the -h argument
            index = np.where(names_normalization == histnorm)[0]
            if index.size > 0:
                normalization_to_use = normalization_method[index[0]]
                new_hist = normalization_to_use.normalizeHist(
                    hist, new_size, n_images)
            else:
                print 'Wrong normalization name passed in the -h argument. Options: SBIN, TFNORM, TFIDF and TFIDF2'
                sys.exit()

            #FOR RESULTS FILE
            normalization_to_use.writeFile(f)

        else:
            #FOR RESULTS FILE
            f.write("No histogram normalization applied\n")
            new_hist = hist

        #################################################################
        #
        # Clustering of the features
        #
        #################################################################

        #save current time
        start_time = time.time()

        #Get detector classes
        clust_dbscan = Dbscan.Dbscan(dist)
        clust_kmeans = KMeans1.KMeans1([nclusters])
        clust_birch = Birch.Birch(nclusters)
        clust_meanSift = meanSift.MeanSift(nclusters)
        clust_hierar1 = hierarchicalClustering.Hierarchical(nclusters, dist)
        clust_hierar2 = hierarchicalClustScipy.HierarchicalScipy(dist)
        clust_community = communityDetection.CommunityDetection(dist)

        names_clustering = np.array([
            "DBSCAN", "KMEANS", "BIRCH", "MEANSIFT", "HIERAR1", "HIERAR2",
            "COMM"
        ])
        clustering_algorithm = np.array([
            clust_dbscan, clust_kmeans, clust_birch, clust_meanSift,
            clust_hierar1, clust_hierar2, clust_community
        ])

        #Get the detector passed in the -a argument
        index = np.where(names_clustering == clust)[0]
        if index.size > 0:
            clustering_to_use = clustering_algorithm[index[0]]
        else:
            print 'Wrong clustering algorithm name passed in the -a argument. Options: DBSCAN, KMEANS, BIRCH, MEANSIFT, HIERAR1, HIERAR2, COMM'
            sys.exit()

        clusters = clustering_to_use.obtainClusters(new_hist)

        #FOR RESULTS FILE
        clustering_to_use.writeFileCluster(f)

        elapsed_time = (time.time() - start_time)
        print 'Time to run clustering algorithm = ' + str(elapsed_time)
        f.write('Time to run clustering algorithm = ' + str(elapsed_time) +
                '\n')

        print 'Number of clusters obtained = ' + str(max(clusters) + 1)
        f.write('Number of clusters obtained = ' + str(max(clusters) + 1) +
                '\n')

        nclusters = max(clusters) + 1

        print 'Clusters obtained = ' + str(np.asarray(clusters))

        #date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')
        #np.savetxt('saveClusters_'+date_time+'_.txt', clusters, '%i', ',')

        #ADDED
        #################################################################
        #
        # Create folder with central images for each cluster
        #
        #################################################################

        #obtain representative images for each cluster
        central_ims = clust_community.obtainCenteralImages(new_hist, clusters)

        central_folder = os.path.join(dir_results, 'CenterImages')
        if not os.path.exists(central_folder):
            os.makedirs(central_folder)

        count = 0
        for central_im in central_ims:
            filename = os.path.join(central_folder,
                                    'Cluster_' + str(count) + '.jpg')
            img = cv2.imread(imPaths[central_im], 1)
            cv2.imwrite(filename, img)
            count = count + 1

        #ADDED
        #################################################################
        #
        # Separate Clusters into folders
        #
        #################################################################

        clusters_folder = os.path.join(dir_results, 'Clusters')
        if not os.path.exists(clusters_folder):
            os.makedirs(clusters_folder)

        clust_dir = []
        for iclust in range(0, nclusters):
            direc = os.path.join(clusters_folder, 'Cluster_' + str(iclust))
            if not os.path.exists(direc):
                os.makedirs(direc)
            clust_dir.append(direc)

        for im in range(0, len(imPaths)):
            im_name = imPaths[im].split('/')[-1]
            #print clust_dir[int(clusters[im])]
            filename = os.path.join(clust_dir[int(clusters[im])], im_name)
            #print filename
            img = cv2.imread(imPaths[im], 1)
            cv2.imwrite(filename, img)

        #################################################################
        #
        # Evaluation
        #
        #################################################################

        users = 0

        if users == 1:

            rand_index = evaluationUsers.randIndex(clusters)
            rand_indexes.append(rand_index)
            print 'rand_index = ' + str(rand_index)
            f.write("Rand Index = " + str(rand_index) + "\n")

        else:
            if len(clusters) == len(labels):

                f.write("\nResults\n")

                f.write('Clusters Obtained = ' + str(np.asarray(clusters)))
                f.write('Labels = ' + str(np.asarray(labels)))

                rand_index = metrics.adjusted_rand_score(labels, clusters)
                rand_indexes.append(rand_index)
                print 'rand_index = ' + str(rand_index)
                f.write("Rand Index = " + str(rand_index) + "\n")

                NMI_index = metrics.normalized_mutual_info_score(
                    labels, clusters)
                nmi_indexes.append(NMI_index)
                print 'NMI_index = ' + str(NMI_index)
                f.write("NMI Index = " + str(NMI_index) + "\n")

    if rep > 1:
        f.write("\nFINAL RESULTS\n")
        f.write("Avg Rand Index = " + str(float(sum(rand_indexes)) / rep) +
                "\n")
        f.write("Std Rand Index = " + str(statistics.stdev(rand_indexes)) +
                "\n")
        if users != 1:
            f.write("Avg NMI Index = " + str(float(sum(nmi_indexes)) / rep) +
                    "\n")
            f.write("Std NMI Index = " + str(statistics.stdev(nmi_indexes)) +
                    "\n")
    f.close()
Exemple #6
0
def run(pathImages, method, keypnt, numpatch, equalnum, imdes, imsample,
        percentage, codebook, dist, size, fselec, fselec_perc, histnorm, clust,
        K, pca, nclusters, rep):

    #################################################################
    #
    # Initializations and result file configurations
    #
    #################################################################

    im_dataset_name = pathImages.split('/')[-1]

    date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')

    name_results_file = 'BOF_' + im_dataset_name + '_' + keypnt + '_' + str(
        numpatch
    ) + '_' + str(
        equalnum
    ) + '_' + imdes + '_' + imsample + '_' + codebook + '_' + str(
        size
    ) + '_' + fselec + '_' + histnorm + '_' + clust + '_' + dist + '_' + date_time

    #dir_results = 'Results_' + im_dataset_name + '_BOF_' + date_time
    dir_results = 'Results_BOF'

    if not os.path.exists(dir_results):
        os.makedirs(dir_results)

    file_count = 2
    file_name = os.path.join(dir_results, name_results_file)
    while os.path.exists(file_name + ".txt"):
        file_name = os.path.join(dir_results,
                                 name_results_file) + "_" + str(file_count)
        file_count = file_count + 1
    f = open(file_name + ".txt", 'w')

    #################################################################
    #
    # Get images
    #
    #################################################################

    #pathImages = '/Users/Mariana/mieec/Tese/Development/ImageDatabases/Graz-01_sample'

    imList = get_imlist(pathImages)

    print 'Number of images read = ' + str(len(imList))
    f.write("Number of images in dataset read: " + str(len(imList)) + "\n")

    #################################################################
    #
    # Image description
    #
    #################################################################

    #Get detector classes
    det_sift = siftLib.Sift(numpatch, equalnum)
    det_surf = surfLib.Surf(numpatch, equalnum)
    det_fast = fastDetector.Fast(numpatch, equalnum)
    det_star = starDetector.Star(numpatch, equalnum)
    det_orb = orbLib.Orb(numpatch, equalnum)
    det_random = randomDetector.Random(numpatch)

    names_detectors = np.array(
        ["SIFT", "SURF", "FAST", "STAR", "ORB", "RANDOM"])
    detectors = np.array(
        [det_sift, det_surf, det_fast, det_star, det_orb, det_random])

    #Get the detector passed in the -k argument
    index = np.where(names_detectors == keypnt)[0]
    if index.size > 0:
        detector_to_use = detectors[index[0]]
    else:
        print 'Wrong detector name passed in the -k argument. Options: SIFT, SURF, FAST, STAR, ORB and RANDOM'
        sys.exit()

    #FOR RESULTS FILE
    detector_to_use.writeParametersDet(f)

    #Get descriptor classes
    des_sift = siftLib.Sift(numpatch, equalnum)
    des_surf = surfLib.Surf(numpatch, equalnum)
    des_orb = orbLib.Orb(numpatch)
    des_brief = briefDescriptor.Brief()
    des_freak = freakDescriptor.Freak()

    names_descriptors = np.array(["SIFT", "SURF", "ORB", "BRIEF", "FREAK"])
    descriptors = np.array([des_sift, des_surf, des_orb, des_brief, des_freak])

    #Get the detector passed in the -d argument
    index = np.where(names_descriptors == imdes)[0]
    if index.size > 0:
        descriptor_to_use = descriptors[index[0]]
    else:
        print 'Wrong descriptor name passed in the -d argument. Options: SIFT, SURF, ORB, BRIEF and FREAK'
        sys.exit()

    #FOR RESULTS FILE
    descriptor_to_use.writeParametersDes(f)

    kp_vector = []  #vector with the keypoints object
    des_vector = [
    ]  #vector wih the descriptors (in order to obtain the codebook)
    number_of_kp = []  #vector with the number of keypoints per image

    counter = 1

    #save current time
    start_time = time.time()

    labels = []
    class_names = []
    #ADDED
    imPaths = []

    #detect the keypoints and compute the sift descriptors for each image
    for im in imList:
        if 'DS_Store' not in im:
            #ADDED
            imPaths.append(im)
            print 'image: ' + str(im) + ' number: ' + str(counter)
            #read image
            img = cv2.imread(im, 0)

            #mask in order to avoid keypoints in border of image. size = 40 pixels
            border = 40
            height, width = img.shape
            mask = np.zeros(img.shape, dtype=np.uint8)
            mask[border:height - border, border:width - border] = 1

            #get keypoints from detector
            kp = detector_to_use.detectKp(img, mask)

            #get features from descriptor
            des = descriptor_to_use.computeDes(img, kp)

            number_of_kp.append(len(kp))
            kp_vector.append(kp)
            if counter == 1:
                des_vector = des
            else:
                des_vector = np.concatenate((des_vector, des), axis=0)
            counter += 1

            #for evaluation
            name1 = im.split("/")[-1]
            name = name1.split("_")[0]

            if name in class_names:
                index = class_names.index(name)
                labels.append(index)
            else:
                class_names.append(name)
                index = class_names.index(name)
                labels.append(index)

    #measure the time to compute the description of each image (divide time elapsed by # of images)
    elapsed_time = (time.time() - start_time) / len(imList)
    print 'Time to compute detector and descriptor for each image = ' + str(
        elapsed_time)

    f.write(
        'Average time to compute detector and descriptor for each image = ' +
        str(elapsed_time) + '\n')

    n_images = len(kp_vector)

    average_words = sum(number_of_kp) / float(len(number_of_kp))

    print 'Total number of features = ' + str(len(des_vector))
    f.write('Total number of features obtained = ' + str(len(des_vector)) +
            '\n')
    print 'Average number of keypoints per image = ' + str(average_words)
    f.write('Average number of keypoints per image = ' + str(average_words) +
            '\n')

    #################################################################
    #
    # Dimentionality reduction
    #
    #################################################################

    if pca != None:
        start_time = time.time()
        print 'Applying PCA...'
        pca = PCA(n_components=pca)
        descriptors_reduced = pca.fit(des_vector).transform(des_vector)
        print 'PCA Applied.'
        print 'time to apply PCA = ' + str(time.time() - start_time)
        des_vector = descriptors_reduced

    #################################################################
    #
    # Image and Keypoint sampling
    #
    #################################################################

    rand_indexes = []
    nmi_indexes = []

    for iteraction in range(0, rep):

        print "\nIteraction #" + str(iteraction + 1) + '\n'
        f.write("\nIteraction #" + str(iteraction + 1) + '\n')

        print 'Sampling images and keypoints prior to codebook computation...'

        if imsample != "NONE":

            sampleKp = sampleKeypoints.SamplingImandKey(
                n_images, number_of_kp, average_words, percentage)
            sampleallKp = sampleAllKeypoints.SamplingAllKey(percentage)

            names_sampling = np.array(["SAMPLEI", "SAMPLEP"])
            sample_method = np.array([sampleKp, sampleallKp])

            #Get the detector passed in the -g argument
            index = np.where(names_sampling == imsample)[0]
            if index.size > 0:
                sampling_to_use = sample_method[index[0]]
            else:
                print 'Wrong sampling method passed in the -g argument. Options: NONE, SAMPLEI, SAMPLEP'
                sys.exit()

            #FOR RESULTS FILE
            sampling_to_use.writeFile(f)

            des_vector_sampled = sampling_to_use.sampleKeypoints(des_vector)

            print 'Total number of features after sampling = ' + str(
                len(des_vector_sampled))
            f.write('Total number of features after sampling = ' +
                    str(len(des_vector_sampled)) + '\n')

            print 'Images and keypoints sampled...'

        else:
            print 'No sampling method chosen'
            #FOR RESULTS FILE
            f.write(
                "No method of keypoint sampling chosen. Use all keypoints for codebook construction \n"
            )
            des_vector_sampled = des_vector

        #################################################################
        #
        # Codebook computation
        #
        #################################################################

        print 'Obtaining codebook...'

        #save current time
        start_time = time.time()

        #Get detector classes
        codebook_kmeans = KMeans1.KMeans1(size)
        codebook_birch = Birch.Birch(size)
        codebook_minibatch = minibatch.MiniBatch(size)
        codebook_randomv = randomSamplesBook.RandomVectors(size)
        codebook_allrandom = allrandom.AllRandom(size)

        names_codebook = np.array(
            ["KMEANS", "BIRCH", "MINIBATCH", "RANDOMV", "RANDOM"])
        codebook_algorithm = np.array([
            codebook_kmeans, codebook_birch, codebook_minibatch,
            codebook_randomv, codebook_allrandom
        ])

        #Get the detector passed in the -c argument
        index = np.where(names_codebook == codebook)[0]
        if index.size > 0:
            codebook_to_use = codebook_algorithm[index[0]]
        else:
            print 'Wrong codebook construction algorithm name passed in the -c argument. Options: KMEANS, MINIBATCH, RANDOMV and RANDOM'
            sys.exit()

        #FOR RESULTS FILE
        codebook_to_use.writeFileCodebook(f)

        #Get centers and projections using codebook algorithm
        centers, projections = codebook_to_use.obtainCodebook(
            des_vector_sampled, des_vector)

        #compute the number of unique descriptor vectors
        codebook_randomv.unique_vectors(centers)

        elapsed_time = (time.time() - start_time)
        print 'Time to compute codebook = ' + str(elapsed_time)
        f.write('Time to compute codebook = ' + str(elapsed_time) + '\n')

        #################################################################
        #
        # Obtain Histogram
        #
        #################################################################

        print 'Obtaining histograms...'

        #print 'projection shape = '+ str(projections.shape)
        #print 'size = ' + str(size)
        #print 'n of images = ' + str(n_images)
        #print 'number of kp' + str(number_of_kp)

        hist = histogram.computeHist(projections, size, n_images, number_of_kp)
        #print hist
        print 'Histograms obtained'

        ################################################################
        #
        # Feature selection
        #
        #################################################################

        print 'Number of visual words = ' + str(len(hist[0]))

        if fselec != "NONE":

            print 'Applying feature selection to descriptors...'

            filter_max = filterMax.WordFilterMax(fselec_perc[0])
            filter_min = filterMin.WordFilterMin(fselec_perc[1])
            filter_maxmin = filterMaxMin.WordFilterMaxMin(
                fselec_perc[0], fselec_perc[1])

            names_filter = np.array(["FMAX", "FMIN", "FMAXMIN"])
            filter_method = np.array([filter_max, filter_min, filter_maxmin])

            #Get the detector passed in the -f argument
            index = np.where(names_filter == fselec)[0]
            if index.size > 0:
                filter_to_use = filter_method[index[0]]
            else:
                print 'Wrong codebook construction algorithm name passed in the -f argument. Options: NONE, FMAX, FMIN, FMAXMIN'
                sys.exit()

            hist = filter_to_use.applyFilter(hist, size, n_images)

            #FOR RESULTS FILE
            filter_to_use.writeFile(f)

            new_size = hist.shape[1]

            print 'Visual words Filtered'
            print 'Number of visual words filtered = ' + str(size - new_size)
            f.write("Number of visual words filtered = " +
                    str(size - new_size) + '\n')
            print 'Final number of visual words = ' + str(new_size)
            f.write('Final number of visual words = ' + str(new_size) + '\n')

        else:
            #FOR RESULTS FILE
            filter_min = filterMin.WordFilterMin(0)
            hist = filter_min.applyFilter(hist, size, n_images)
            new_size = hist.shape[1]
            print 'Number of visual words filtered = ' + str(size - new_size)
            f.write("No feature selection applied \n")

        #################################################################
        #
        # Histogram Normalization
        #
        #################################################################

        if histnorm != "NONE":

            #Get detector classes
            norm_sbin = simpleBinarization.SimpleBi()
            norm_tfnorm = tfnorm.Tfnorm()
            norm_tfidf = tfidf.TfIdf()
            norm_tfidf2 = tfidf2.TfIdf2()
            norm_tfidf3 = tfidf3.Tfidf3()
            norm_power = powerNorm.PowerNorm()
            norm_tfidfnorm = tfidfnorm.TfIdfnorm()
            norm_okapi = okapi.Okapi(average_words)

            names_normalization = np.array(
                ["SBIN", "TFNORM", "TFIDF", "TFIDF2", "TFIDFNORM", "OKAPI"])
            normalization_method = np.array([
                norm_sbin, norm_tfnorm, norm_tfidf, norm_tfidf2,
                norm_tfidfnorm, norm_okapi
            ])

            #Get the detector passed in the -h argument
            index = np.where(names_normalization == histnorm)[0]
            if index.size > 0:
                normalization_to_use = normalization_method[index[0]]
                new_hist = normalization_to_use.normalizeHist(
                    hist, new_size, n_images)
            else:
                print 'Wrong normalization name passed in the -h argument. Options: SBIN, TFNORM, TFIDF and TFIDF2'
                sys.exit()

            #FOR RESULTS FILE
            normalization_to_use.writeFile(f)

        else:
            #FOR RESULTS FILE
            f.write("No histogram normalization applied\n")
            new_hist = hist

        #################################################################
        #
        # Clustering of the features
        #
        #################################################################

        #save current time
        start_time = time.time()

        #Get detector classes
        clust_dbscan = Dbscan.Dbscan(dist)
        clust_kmeans = KMeans1.KMeans1([nclusters])
        clust_kmeans2 = kmeans2.KMeans2([nclusters])
        clust_birch = Birch.Birch(nclusters)
        clust_meanSift = meanSift.MeanSift(nclusters)
        clust_hierar1 = hierarchicalClustering.Hierarchical(nclusters, dist)
        clust_hierar2 = hierarchicalClustScipy.HierarchicalScipy(dist)
        clust_community = communityDetection.CommunityDetection(dist)

        names_clustering = np.array([
            "DBSCAN", "KMEANS", "BIRCH", "MEANSIFT", "HIERAR1", "HIERAR2",
            "COMM"
        ])
        clustering_algorithm = np.array([
            clust_dbscan, clust_kmeans, clust_birch, clust_meanSift,
            clust_hierar1, clust_hierar2, clust_community
        ])

        #Get the detector passed in the -a argument
        index = np.where(names_clustering == clust)[0]
        if index.size > 0:
            clustering_to_use = clustering_algorithm[index[0]]
        else:
            print 'Wrong clustering algorithm name passed in the -a argument. Options: DBSCAN, KMEANS, BIRCH, MEANSIFT, HIERAR1, HIERAR2, COMM'
            sys.exit()

        clusters = clustering_to_use.obtainClusters(new_hist)

        #FOR RESULTS FILE
        clustering_to_use.writeFileCluster(f)

        elapsed_time = (time.time() - start_time)
        print 'Time to run clustering algorithm = ' + str(elapsed_time)
        f.write('Time to run clustering algorithm = ' + str(elapsed_time) +
                '\n')

        #ADDED
        nclusters = int(max(clusters) + 1)
        print 'Number of clusters obtained = ' + str(max(clusters) + 1)
        f.write('Number of clusters obtained = ' + str(max(clusters) + 1) +
                '\n')

        print 'Clusters obtained = ' + str(np.asarray(clusters))

        #date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')
        #np.savetxt('saveClusters_'+date_time+'_.txt', clusters, '%i', ',')

        #ADDED
        #################################################################
        #
        # Create folder with central images for each cluster
        #
        #################################################################

        ###obtain representative images for each cluster
        #central_ims = clust_community.obtainCenteralImages(new_hist, clusters)

        #central_folder = os.path.join(dir_results,'CenterImages')
        #if not os.path.exists(central_folder):
        #os.makedirs(central_folder)

        #count=0
        #for central_im in central_ims:
        #filename = os.path.join(central_folder,'Cluster_'+str(count)+'.jpg')
        #img = cv2.imread(imPaths[central_im],1)
        #cv2.imwrite(filename, img)
        #count = count + 1

        ##ADDED
        ##################################################################
        ##
        ## Separate Clusters into folders
        ##
        ##################################################################

        #clusters_folder = os.path.join(dir_results,'Clusters')
        #if not os.path.exists(clusters_folder):
        #os.makedirs(clusters_folder)

        #clust_dir = []
        #for iclust in range(0,nclusters):
        #direc = os.path.join(clusters_folder,'Cluster_'+str(iclust))
        #if not os.path.exists(direc):
        #os.makedirs(direc)
        #clust_dir.append(direc)

        #for im in range(0,len(imPaths)):
        #im_name = imPaths[im].split('/')[-1]
        ##print clust_dir[int(clusters[im])]
        #filename = os.path.join(clust_dir[int(clusters[im])],im_name)
        ##print filename
        #img = cv2.imread(imPaths[im],1)
        #cv2.imwrite(filename, img)

        ##calculate distances between images and closest images
        #closest_im = distances.calculateClosest(new_hist,dist)
        ##print closest_im

        #if not os.path.exists('ClosestImages'):
        #os.makedirs('ClosestImages')

        #file_name = os.path.join('ClosestImages',name_results_file)
        #f2 = open(file_name + ".txt", 'w')
        #counter = 0
        #counter2 = 1
        #for ims in closest_im:
        #for im in ims:
        #f2.write(str(counter2) + '-' + str(counter) + '-' + str(im) + '\n')
        #counter2 = counter2 + 1
        #counter = counter + 1

        #f2.close()

        #################################################################
        #
        # Evaluation
        #
        #################################################################

        users = 0
        #labels = np.load('IndividualClustersMatrix.npy')

        if users == 1:

            rand_index = evaluationUsers.randIndex(clusters)
            rand_indexes.append(rand_index)
            print 'rand_index = ' + str(rand_index)
            f.write("Rand Index = " + str(rand_index) + "\n")

        else:
            if len(clusters) == len(labels):

                f.write("\nResults\n")

                f.write('Clusters Obtained = ' + str(np.asarray(clusters)))
                f.write('Labels = ' + str(np.asarray(labels)))
                rand_index = metrics.adjusted_rand_score(labels, clusters)
                rand_indexes.append(rand_index)
                print 'rand_index = ' + str(rand_index)
                f.write("Rand Index = " + str(rand_index) + "\n")
                NMI_index = metrics.normalized_mutual_info_score(
                    labels, clusters)
                nmi_indexes.append(NMI_index)
                print 'NMI_index = ' + str(NMI_index)
                f.write("NMI Index = " + str(NMI_index) + "\n")

    if rep > 1:
        f.write("\nFINAL RESULTS\n")
        f.write("Avg Rand Index = " + str(float(sum(rand_indexes)) / rep) +
                "\n")
        f.write("Std Rand Index = " + str(statistics.stdev(rand_indexes)) +
                "\n")
        if users != 1:
            f.write("Avg NMI Index = " + str(float(sum(nmi_indexes)) / rep) +
                    "\n")
            f.write("Std NMI Index = " + str(statistics.stdev(nmi_indexes)) +
                    "\n")
    f.close()
Exemple #7
0
            filter_min = filterMin.WordFilterMin(0)
            hist = filter_min.applyFilter(hist, size, n_images)
            new_size = hist.shape[1]
            print 'Number of visual words filtered = ' + str(size - new_size)
            f.write("No feature selection applied \n")

        #################################################################
        #
        # Histogram Normalization
        #
        #################################################################

        if histnorm != "NONE":

            #Get detector classes
            norm_sbin = simpleBinarization.SimpleBi()
            norm_tfnorm = tfnorm.Tfnorm()
            norm_tfidf = tfidf.TfIdf()
            norm_tfidf2 = tfidf2.TfIdf2()
            norm_tfidfnorm = tfidfnorm.TfIdfnorm()
            norm_okapi = okapi.Okapi(average_words)

            names_normalization = np.array(
                ["SBIN", "TFNORM", "TFIDF", "TFIDF2", "TFIDFNORM", "OKAPI"])
            normalization_method = np.array([
                norm_sbin, norm_tfnorm, norm_tfidf, norm_tfidf2,
                norm_tfidfnorm, norm_okapi
            ])

            #Get the detector passed in the -h argument
            index = np.where(names_normalization == histnorm)[0]
Exemple #8
0
def run(pathImages, method, keypnt, numpatch, equalnum, imdes, imsample,
        percentage, codebook, dist, size, fselec, fselec_perc, histnorm, clust,
        K, pca, nclusters, rep, levels):

    #################################################################
    #
    # Initializations and result file configurations
    #
    #################################################################

    #warnings.simplefilter("error")

    if os.path.exists('save_HIST.txt') == True:
        os.remove('save_HIST.txt')

    if os.path.exists('save_dist.txt') == True:
        os.remove('save_dist.txt')

    if os.path.exists('saveClustersKmeans.txt') == True:
        os.remove('saveClustersKmeans.txt')

    im_dataset_name = pathImages.split('/')[-1]

    date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')

    name_results_file = im_dataset_name + '_' + keypnt + '_' + str(
        numpatch
    ) + '_' + str(equalnum) + '_' + imdes + '_' + 'levels:' + str(
        levels
    ) + '_' + imsample + '_' + codebook + '_' + str(
        size
    ) + '_' + fselec + '_' + histnorm + '_' + clust + '_' + dist + '_' + date_time

    #dir_results = 'Results_' + im_dataset_name + '_SPM_' + date_time
    dir_results = 'Results_SPM'

    if not os.path.exists(dir_results):
        os.makedirs(dir_results)

    file_count = 2
    file_name = os.path.join(dir_results, name_results_file)
    while os.path.exists(file_name + ".txt"):
        file_name = os.path.join(dir_results,
                                 name_results_file) + "_" + str(file_count)
        file_count = file_count + 1
    f = open(file_name + ".txt", 'w')

    #################################################################
    #
    # Get images
    #
    #################################################################

    #pathImages = '/Users/Mariana/mieec/Tese/Development/ImageDatabases/Graz-01_sample'

    imList = get_imlist(pathImages)

    print 'Number of images read = ' + str(len(imList))
    f.write("Number of images in dataset read: " + str(len(imList)) + "\n")

    #################################################################
    #
    # Image description
    #
    #################################################################

    #Number of regions
    n_regions = np.power(4, levels - 1)

    #Get detector classes
    det_sift = siftLib.Sift(numpatch / n_regions, equalnum)
    det_surf = surfLib.Surf(numpatch / n_regions, equalnum)
    det_fast = fastDetector.Fast(numpatch / n_regions, equalnum)
    det_star = starDetector.Star(numpatch / n_regions, equalnum)
    det_orb = orbLib.Orb(numpatch / n_regions, equalnum)
    det_random = randomDetector.Random(numpatch / n_regions)

    names_detectors = np.array(
        ["SIFT", "SURF", "FAST", "STAR", "ORB", "RANDOM"])
    detectors = np.array(
        [det_sift, det_surf, det_fast, det_star, det_orb, det_random])

    #Get the detector passed in the -k argument
    index = np.where(names_detectors == keypnt)[0]
    if index.size > 0:
        detector_to_use = detectors[index[0]]
    else:
        print 'Wrong detector name passed in the -k argument. Options: SIFT, SURF, FAST, STAR, ORB and RANDOM'
        sys.exit()

    #FOR RESULTS FILE
    detector_to_use.writeParametersDet(f)

    #Get descriptor classes
    des_sift = siftLib.Sift(numpatch / n_regions, equalnum)
    des_surf = surfLib.Surf(numpatch / n_regions, equalnum)
    des_orb = orbLib.Orb(numpatch / n_regions)
    des_brief = briefDescriptor.Brief()
    des_freak = freakDescriptor.Freak()

    names_descriptors = np.array(["SIFT", "SURF", "ORB", "BRIEF", "FREAK"])
    descriptors = np.array([des_sift, des_surf, des_orb, des_brief, des_freak])

    #Get the detector passed in the -d argument
    index = np.where(names_descriptors == imdes)[0]
    if index.size > 0:
        descriptor_to_use = descriptors[index[0]]
    else:
        print 'Wrong descriptor name passed in the -d argument. Options: SIFT, SURF, ORB, BRIEF and FREAK'
        sys.exit()

    #FOR RESULTS FILE
    descriptor_to_use.writeParametersDes(f)

    kp_vector = []  #vector with the keypoints object
    des_vector = [
    ]  #vector wih the descriptors (in order to obtain the codebook)
    number_of_kp = []  #vector with the number of keypoints per image

    counter = 1

    #save current time
    start_time = time.time()

    labels = []
    class_names = []

    #Border
    border = 40

    side = int(np.sqrt(n_regions))
    des_vector_byregion = [0] * n_regions
    number_of_kp_region = [0] * n_regions
    filled = [0] * n_regions

    #matrixes of the indexes
    mat_indexes = np.array([[0, 1, 4, 5, 16, 17, 20, 21],
                            [2, 3, 6, 7, 18, 19, 22, 23],
                            [8, 9, 12, 13, 24, 25, 28, 29],
                            [10, 11, 14, 15, 26, 27, 30, 31],
                            [32, 33, 36, 37, 48, 49, 52, 53],
                            [34, 35, 38, 39, 50, 51, 54, 55],
                            [40, 41, 44, 45, 56, 57, 60, 61],
                            [42, 43, 46, 47, 58, 59, 62, 63]])

    #detect the keypoints and compute the sift descriptors for each image
    for im in imList:
        if 'DS_Store' not in im:
            print 'image: ' + str(im) + ' number: ' + str(counter)
            #read image
            img = cv2.imread(im, 0)

            # region
            for i in range(0, side):
                for j in range(0, side):

                    #mask in order to avoid keypoints in border of image. size = 40 pixels
                    height, width = img.shape
                    h_region = (height - 2 * border) / np.sqrt(n_regions)
                    w_region = (width - 2 * border) / np.sqrt(n_regions)
                    mask = np.zeros(img.shape, dtype=np.uint8)

                    mask[border + i * h_region:border + (i + 1) * h_region,
                         border + j * w_region:border + (j + 1) * w_region] = 1

                    #get keypoints from detector
                    kp = detector_to_use.detectKp(img, mask)

                    #get features from descriptor
                    des = descriptor_to_use.computeDes(img, kp)

                    number_of_kp.append(len(kp))

                    #print i*np.sqrt(n_regions)+j
                    #print number_of_kp_region[int(i*np.sqrt(n_regions)+j)]

                    if filled[mat_indexes[i, j]] == 1:
                        #descriptors of all the regions (in a list)
                        des_vector_byregion[mat_indexes[
                            i, j]] = np.concatenate(
                                (des_vector_byregion[mat_indexes[i, j]], des),
                                axis=0)

                        #number of descriptors in each region
                        number_of_kp_region[mat_indexes[
                            i, j]] = np.concatenate(
                                (number_of_kp_region[mat_indexes[i, j]],
                                 np.array([len(kp)])),
                                axis=0)
                    else:
                        des_vector_byregion[mat_indexes[i, j]] = des
                        number_of_kp_region[mat_indexes[i, j]] = np.array(
                            [len(kp)])
                        filled[mat_indexes[i, j]] = 1

                    #print des_vector_byregion
                    #print number_of_kp_region

            #for evaluation
            name1 = im.split("/")[-1]
            name = name1.split("_")[0]

            if name in class_names:
                index = class_names.index(name)
                labels.append(index)
            else:
                class_names.append(name)
                index = class_names.index(name)
                labels.append(index)
            counter += 1

    #measure the time to compute the description of each image (divide time elapsed by # of images)
    elapsed_time = (time.time() - start_time) / len(imList)
    print 'Time to compute detector and descriptor for each image = ' + str(
        elapsed_time)

    f.write(
        'Average time to compute detector and descriptor for each image = ' +
        str(elapsed_time) + '\n')

    n_images = counter - 1

    average_words = sum(number_of_kp) / float(len(number_of_kp))

    #all the descriptors together
    des_vector = np.concatenate(np.array(des_vector_byregion))

    print 'Total number of features = ' + str(len(des_vector))
    f.write('Total number of features obtained = ' + str(len(des_vector)) +
            '\n')
    print 'Average number of keypoints per image = ' + str(average_words)
    f.write('Average number of keypoints per image = ' + str(average_words) +
            '\n')

    #################################################################
    #
    # Image and Keypoint sampling
    #
    #################################################################

    rand_indexes = []
    nmi_indexes = []

    for iteraction in range(0, rep):

        print "\nIteraction #" + str(iteraction + 1) + '\n'
        f.write("\nIteraction #" + str(iteraction + 1) + '\n')

        print 'Sampling images and keypoints prior to codebook computation...'

        if imsample != "NONE":

            sampleKp = sampleKeypoints.SamplingImandKey(
                n_images, number_of_kp, average_words, percentage)
            sampleallKp = sampleAllKeypoints.SamplingAllKey(percentage)

            names_sampling = np.array(["SAMPLEI", "SAMPLEP"])
            sample_method = np.array([sampleKp, sampleallKp])

            #Get the detector passed in the -g argument
            index = np.where(names_sampling == imsample)[0]
            if index.size > 0:
                sampling_to_use = sample_method[index[0]]
            else:
                print 'Wrong sampling method passed in the -g argument. Options: NONE, SAMPLEI, SAMPLEP'
                sys.exit()

            #FOR RESULTS FILE
            sampling_to_use.writeFile(f)

            des_vector_sampled = sampling_to_use.sampleKeypoints(des_vector)

            print 'Total number of features after sampling = ' + str(
                len(des_vector_sampled))
            f.write('Total number of features after sampling = ' +
                    str(len(des_vector_sampled)) + '\n')

            print 'Images and keypoints sampled...'

        else:
            print 'No sampling method chosen'
            #FOR RESULTS FILE
            f.write(
                "No method of keypoint sampling chosen. Use all keypoints for codebook construction \n"
            )
            des_vector_sampled = des_vector

        #################################################################
        #
        # Codebook computation
        #
        #################################################################

        print 'Obtaining codebook...'

        #save current time
        start_time = time.time()

        #Get detector classes
        codebook_kmeans = KMeans1.KMeans1(size)
        codebook_birch = Birch.Birch(size)
        codebook_minibatch = minibatch.MiniBatch(size)
        codebook_randomv = randomSamplesBook.RandomVectors(size)
        codebook_allrandom = allrandom.AllRandom(size)

        names_codebook = np.array(
            ["KMEANS", "BIRCH", "MINIBATCH", "RANDOMV", "RANDOM"])
        codebook_algorithm = np.array([
            codebook_kmeans, codebook_birch, codebook_minibatch,
            codebook_randomv, codebook_allrandom
        ])

        #Get the detector passed in the -c argument
        index = np.where(names_codebook == codebook)[0]
        if index.size > 0:
            codebook_to_use = codebook_algorithm[index[0]]
        else:
            print 'Wrong codebook construction algorithm name passed in the -c argument. Options: KMEANS, MINIBATCH, RANDOMV and RANDOM'
            sys.exit()

        #FOR RESULTS FILE
        codebook_to_use.writeFileCodebook(f)

        #Get centers and projections using codebook algorithm
        centers, projections = codebook_to_use.obtainCodebook(
            des_vector_sampled, des_vector)

        #compute the number of unique descriptor vectors
        codebook_randomv.unique_vectors(centers)

        elapsed_time = (time.time() - start_time)
        print 'Time to compute codebook = ' + str(elapsed_time)
        f.write('Time to compute codebook = ' + str(elapsed_time) + '\n')

        #################################################################
        #
        # Obtain Histogram
        #
        #################################################################

        des_byregion = des_vector_byregion
        numkp_region = number_of_kp_region

        hist_total = []

        for level in range(levels - 1, -1, -1):

            print 'Level = ' + str(level)

            n_regions = np.power(4, level)

            for i in range(0, n_regions):

                print 'Obtaining histograms...'

                #print 'projection shape = '+ str(projections.shape)
                #print 'size = ' + str(size)
                #print 'n of images = ' + str(n_images)
                #print 'number of kp' + str(number_of_kp)

                #print len(des_vector_byregion)
                #print len(des_vector_byregion[0])
                #print len(des_vector_byregion[0][0])

                result = scipy.cluster.vq.vq(np.array(des_byregion[i]),
                                             centers)
                projections_region = result[0]

                #print 'projections = ' + str(projections_region)
                #print n_images
                #print number_of_kp_region[i]

                #print len(number_of_kp_region)
                #print len(number_of_kp_region[0])

                hist = histogram.computeHist(projections_region, size,
                                             n_images, numkp_region[i])
                #print hist
                print 'Histograms obtained'

                #print hist

                ################################################################
                #
                # Feature selection
                #
                #################################################################

                print 'Number of visual words = ' + str(len(hist[0]))

                if fselec != "NONE":

                    print 'Applying feature selection to descriptors...'

                    filter_max = filterMax.WordFilterMax(fselec_perc[0])
                    filter_min = filterMin.WordFilterMin(fselec_perc[1])
                    filter_maxmin = filterMaxMin.WordFilterMaxMin(
                        fselec_perc[0], fselec_perc[1])

                    names_filter = np.array(["FMAX", "FMIN", "FMAXMIN"])
                    filter_method = np.array(
                        [filter_max, filter_min, filter_maxmin])

                    #Get the detector passed in the -f argument
                    index = np.where(names_filter == fselec)[0]
                    if index.size > 0:
                        filter_to_use = filter_method[index[0]]
                    else:
                        print 'Wrong codebook construction algorithm name passed in the -f argument. Options: NONE, FMAX, FMIN, FMAXMIN'
                        sys.exit()

                    hist = filter_to_use.applyFilter(hist, size, n_images)

                    #FOR RESULTS FILE
                    filter_to_use.writeFile(f)

                    new_size = hist.shape[1]

                    print 'Visual words Filtered'
                    print 'Number of visual words filtered = ' + str(size -
                                                                     new_size)
                    f.write("Number of visual words filtered = " +
                            str(size - new_size) + '\n')
                    print 'Final number of visual words = ' + str(new_size)
                    f.write('Final number of visual words = ' + str(new_size) +
                            '\n')

                else:
                    #FOR RESULTS FILE
                    filter_min = filterMin.WordFilterMin(0)
                    hist = filter_min.applyFilter(hist, size, n_images)
                    new_size = hist.shape[1]
                    print 'Number of visual words filtered = ' + str(size -
                                                                     new_size)
                    f.write("No feature selection applied \n")

                #################################################################
                #
                # Histogram Normalization
                #
                #################################################################

                if histnorm != "NONE":

                    #Get detector classes
                    norm_sbin = simpleBinarization.SimpleBi()
                    norm_tfnorm = tfnorm.Tfnorm()
                    norm_tfidf = tfidf.TfIdf()
                    norm_tfidf2 = tfidf2.TfIdf2()
                    norm_tfidfnorm = tfidfnorm.TfIdfnorm()
                    norm_okapi = okapi.Okapi(average_words)

                    names_normalization = np.array([
                        "SBIN", "TFNORM", "TFIDF", "TFIDF2", "TFIDFNORM",
                        "OKAPI"
                    ])
                    normalization_method = np.array([
                        norm_sbin, norm_tfnorm, norm_tfidf, norm_tfidf2,
                        norm_tfidfnorm, norm_okapi
                    ])

                    #Get the detector passed in the -h argument
                    index = np.where(names_normalization == histnorm)[0]
                    if index.size > 0:
                        normalization_to_use = normalization_method[index[0]]
                        new_hist = normalization_to_use.normalizeHist(
                            hist, new_size, n_images)
                    else:
                        print 'Wrong normalization name passed in the -h argument. Options: SBIN, TFNORM, TFIDF and TFIDF2'
                        sys.exit()

                    #FOR RESULTS FILE
                    normalization_to_use.writeFile(f)

                else:
                    #FOR RESULTS FILE
                    f.write("No histogram normalization applied\n")
                    new_hist = hist

                hist_total.append(np.array(new_hist))

            #concatenate des_vector_byregion TODOOOOOOOOOO
            des_vector_aux = []
            number_of_kp_aux = []
            if level != 0:
                side = 4
                ntimes = int(np.power(4, level - 1))
                for h in range(0, ntimes):
                    #print len(des_byregion)
                    #print h*side
                    #print (h+1)*side
                    des_vector_aux.append(
                        np.concatenate(des_byregion[h * side:(h + 1) * side],
                                       axis=0))
                    count = 0
                    for n in numkp_region[h * side:(h + 1) * side]:
                        if count != 0:
                            sum_np = [sum(x) for x in zip(sum_np, n)]
                        else:
                            sum_np = n
                        count = count + 1
                    number_of_kp_aux.append(sum_np)

            des_byregion = des_vector_aux
            numkp_region = number_of_kp_aux

        #print hist_total

        hist_total = np.concatenate(hist_total, axis=1)

        print len(hist_total[0])

        #################################################################
        #
        # Clustering of the features
        #
        #################################################################

        #save current time
        start_time = time.time()

        #Get detector classes
        clust_dbscan = Dbscan.Dbscan(dist)
        clust_kmeans = KMeans1.KMeans1([nclusters])
        clust_birch = Birch.Birch(nclusters)
        clust_meanSift = meanSift.MeanSift(nclusters)
        clust_hierar1 = hierarchicalClustering.Hierarchical(nclusters, dist)
        clust_hierar2 = hierarchicalClustScipy.HierarchicalScipy(dist)
        clust_community = communityDetection.CommunityDetection(dist)

        names_clustering = np.array([
            "DBSCAN", "KMEANS", "BIRCH", "MEANSIFT", "HIERAR1", "HIERAR2",
            "COMM"
        ])
        clustering_algorithm = np.array([
            clust_dbscan, clust_kmeans, clust_birch, clust_meanSift,
            clust_hierar1, clust_hierar2, clust_community
        ])

        #Get the detector passed in the -a argument
        index = np.where(names_clustering == clust)[0]
        if index.size > 0:
            clustering_to_use = clustering_algorithm[index[0]]
        else:
            print 'Wrong clustering algorithm name passed in the -a argument. Options: DBSCAN, KMEANS, BIRCH, MEANSIFT, HIERAR1, HIERAR2, COMM'
            sys.exit()

        clusters = clustering_to_use.obtainClusters(hist_total)

        #FOR RESULTS FILE
        clustering_to_use.writeFileCluster(f)

        elapsed_time = (time.time() - start_time)
        print 'Time to run clustering algorithm = ' + str(elapsed_time)
        f.write('Time to run clustering algorithm = ' + str(elapsed_time) +
                '\n')

        print 'Number of clusters obtained = ' + str(max(clusters) + 1)
        f.write('Number of clusters obtained = ' + str(max(clusters) + 1) +
                '\n')

        print 'Clusters obtained = ' + str(np.asarray(clusters))

        #date_time = datetime.datetime.now().strftime('%b-%d-%I%M%p-%G')
        #np.savetxt('saveClusters_'+date_time+'_.txt', clusters, '%i', ',')

        ##ADDED
        ##################################################################
        ##
        ## Create folder with central images for each cluster
        ##
        ##################################################################

        #dir_results = 'Results_' + im_dataset_name + '_SPM_' + date_time

        ##obtain representative images for each cluster
        #central_ims = clust_community.obtainCenteralImages(new_hist, clusters)

        #central_folder = os.path.join(dir_results,'CenterImages')
        #if not os.path.exists(central_folder):
        #os.makedirs(central_folder)

        #count=0
        #for central_im in central_ims:
        #filename = os.path.join(central_folder,'Cluster_'+str(count)+'.jpg')
        #img = cv2.imread(imPaths[central_im],1)
        #cv2.imwrite(filename, img)
        #count = count + 1

        ##ADDED
        ##################################################################
        ##
        ## Separate Clusters into folders
        ##
        ##################################################################

        #clusters_folder = os.path.join(dir_results,'Clusters')
        #if not os.path.exists(clusters_folder):
        #os.makedirs(clusters_folder)

        #clust_dir = []
        #for iclust in range(0,nclusters):
        #direc = os.path.join(clusters_folder,'Cluster_'+str(iclust))
        #if not os.path.exists(direc):
        #os.makedirs(direc)
        #clust_dir.append(direc)

        #for im in range(0,len(imPaths)):
        #im_name = imPaths[im].split('/')[-1]
        ##print clust_dir[int(clusters[im])]
        #filename = os.path.join(clust_dir[int(clusters[im])],im_name)
        ##print filename
        #img = cv2.imread(imPaths[im],1)
        #cv2.imwrite(filename, img)

        #################################################################
        #
        # Evaluation
        #
        #################################################################

        users = 0

        if users == 1:

            rand_index = evaluationUsers.randIndex(clusters)
            rand_indexes.append(rand_index)
            print 'rand_index = ' + str(rand_index)
            f.write("Rand Index = " + str(rand_index) + "\n")

        else:
            if len(clusters) == len(labels):

                f.write("\nResults\n")

                f.write('Clusters Obtained = ' + str(np.asarray(clusters)))
                f.write('Labels = ' + str(np.asarray(labels)))

                rand_index = metrics.adjusted_rand_score(labels, clusters)
                rand_indexes.append(rand_index)
                print 'rand_index = ' + str(rand_index)
                f.write("Rand Index = " + str(rand_index) + "\n")

                NMI_index = metrics.normalized_mutual_info_score(
                    labels, clusters)
                nmi_indexes.append(NMI_index)
                print 'NMI_index = ' + str(NMI_index)
                f.write("NMI Index = " + str(NMI_index) + "\n")

    if rep > 1:
        f.write("\nFINAL RESULTS\n")
        f.write("Avg Rand Index = " + str(float(sum(rand_indexes)) / rep) +
                "\n")
        f.write("Std Rand Index = " + str(statistics.stdev(rand_indexes)) +
                "\n")
        f.write("Avg NMI Index = " + str(float(sum(nmi_indexes)) / rep) + "\n")
        f.write("Std NMI Index = " + str(statistics.stdev(nmi_indexes)) + "\n")
    f.close()