Exemple #1
0
                        exampleHash[exampleIndex] = i
                        exampleIndex += 1

                # Compute and store the category means
                categoryMeans.append(np.mean(localList, axis=0))

        # Turn imFeatures into a 2D ndarray so that it can be used in K-means later on
        imFeatures = np.array(imFeatures)
        print "Read Caltech data in memory."

        g1 = mixture.GMM(n_components=101,
                         thresh=1e-05,
                         covariance_type='diag')
        print "About to fit data"
        g1.fit(imFeatures)
        pkl.dump(g1, open('proc_data/gmm_obj_diag_cov_sift.pkl', 'wb'))
        print "Fitted data"
        predLabels = g1.predict(imFeatures)
        print "Predicted data"
        predMeans = g1.means_

        errRate, goodClusters, avgEntropy = evaluateClustering(
            g1.means_, imFeatures, predLabels, categoryMeans, exampleHash, 101)

        print "GMM model predicted labels with an error rate of %.4f%%, produced %d \"accurate\" clusters and %.4f average entropy." % (
            errRate, goodClusters, avgEntropy)
        print "That's all. Exiting..."
        quit()
    except Exception as exc:
        print "An exception occurred:" + str(exc) + "."
        quit()
        # Run k-means 500 times (the default) on the data with aim to produce k = 101 clusters.
        # The stopping criterion of each iteration is a difference in the computed distortion
        # (mean squared error) less than e-05 (the default)

        print "Running K-means..."
        codebook, _distortion = kmeans(imFeatures, 101, 100)
        assignments, _distortion = vq(imFeatures, codebook)
        pkl.dump(codebook, open('proc_data/codebook_kmeans_gradients.pkl',
                                'wb'))
        pkl.dump(assignments,
                 open('proc_data/labelAssignments_kmeans_gradients.pkl', 'wb'))
        if (len(assignments) != imFeatures.shape[0]):
            raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." % (
                stack()[0][3], imFeatures.shape[0], len(assignments))
        print "Ran K-means"
        errorRate, goodClusters, avgEntropy = evaluateClustering(
            codebook, imFeatures, assignments, categoryMeans, exampleHash, 101)

        print "K-means produced an error rate of %.4f%%, %d \"good\" clusters and %.4f average entropy." % (
            errorRate, goodClusters, avgEntropy)
        print "The amount of \'\"good\" clusters corresponds to %.4f%% of total clusters." % (
            100 * goodClusters / float(101))

        fp = open('output_data/errorRate_gradients_kmeans.txt', 'w')
        fp.write(str(errorRate))
        fp.close()
        fp = open('output_data/accurateClusters_gradients_kmeans.txt', 'w')
        fp.write(str(goodClusters))
        fp.close()
        fp = open('output_data/averageEntropy_gradients_kmeans.txt', 'w')
        fp.write(str(avgEntropy))
        fp.close()
Exemple #3
0
                        matfile = loadmat('input_data/caltech101_SIFT/dense_bow/oneForAll_nr1_K1000/'+ cat + '/' + gr_im)
                        FV, _binedges = np.histogram(matfile['h'], range(1001))
                        imFeatures.append(FV)
                        localList.append(FV)
                        exampleHash[exampleIndex] = i
                        exampleIndex +=1
                        
                # Compute and store the category means
                categoryMeans.append(np.mean(localList, axis = 0))    
                   
        # Turn imFeatures into a 2D ndarray so that it can be used in K-means later on
        imFeatures = np.array(imFeatures)
        print "Read Caltech data in memory."
    
        g1 = mixture.GMM(n_components=101,thresh = 1e-05, covariance_type='diag') 
        print "About to fit data"
        g1.fit(imFeatures)
        pkl.dump(g1, open('proc_data/gmm_obj_diag_cov_sift.pkl', 'wb'))
        print "Fitted data"
        predLabels= g1.predict(imFeatures)
        print "Predicted data"
        predMeans = g1.means_
        
        errRate, goodClusters, avgEntropy = evaluateClustering(g1.means_, imFeatures, predLabels, categoryMeans, exampleHash, 101)

        print "GMM model predicted labels with an error rate of %.4f%%, produced %d \"accurate\" clusters and %.4f average entropy." %(errRate, goodClusters, avgEntropy)
        print "That's all. Exiting..."
        quit()
    except Exception as exc:
        print "An exception occurred:"  + str(exc) + "."
        quit()
        # (mean squared error) less than e-05 (the default)

        print "Running K-means..."
        codebook, _distortion = kmeans(imFeatures, 101, 100)
        assignments, _distortion = vq(imFeatures, codebook)
        pkl.dump(codebook, open("proc_data/codebook_kmeans_gradients.pkl", "wb"))
        pkl.dump(assignments, open("proc_data/labelAssignments_kmeans_gradients.pkl", "wb"))
        if len(assignments) != imFeatures.shape[0]:
            raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." % (
                stack()[0][3],
                imFeatures.shape[0],
                len(assignments),
            )
        print "Ran K-means"
        errorRate, goodClusters, avgEntropy = evaluateClustering(
            codebook, imFeatures, assignments, categoryMeans, exampleHash, 101
        )

        print 'K-means produced an error rate of %.4f%%, %d "good" clusters and %.4f average entropy.' % (
            errorRate,
            goodClusters,
            avgEntropy,
        )
        print 'The amount of \'"good" clusters corresponds to %.4f%% of total clusters.' % (
            100 * goodClusters / float(101)
        )

        fp = open("output_data/errorRate_gradients_kmeans.txt", "w")
        fp.write(str(errorRate))
        fp.close()
        fp = open("output_data/accurateClusters_gradients_kmeans.txt", "w")