exampleHash[exampleIndex] = i exampleIndex += 1 # Compute and store the category means categoryMeans.append(np.mean(localList, axis=0)) # Turn imFeatures into a 2D ndarray so that it can be used in K-means later on imFeatures = np.array(imFeatures) print "Read Caltech data in memory." g1 = mixture.GMM(n_components=101, thresh=1e-05, covariance_type='diag') print "About to fit data" g1.fit(imFeatures) pkl.dump(g1, open('proc_data/gmm_obj_diag_cov_sift.pkl', 'wb')) print "Fitted data" predLabels = g1.predict(imFeatures) print "Predicted data" predMeans = g1.means_ errRate, goodClusters, avgEntropy = evaluateClustering( g1.means_, imFeatures, predLabels, categoryMeans, exampleHash, 101) print "GMM model predicted labels with an error rate of %.4f%%, produced %d \"accurate\" clusters and %.4f average entropy." % ( errRate, goodClusters, avgEntropy) print "That's all. Exiting..." quit() except Exception as exc: print "An exception occurred:" + str(exc) + "." quit()
# Run k-means 500 times (the default) on the data with aim to produce k = 101 clusters. # The stopping criterion of each iteration is a difference in the computed distortion # (mean squared error) less than e-05 (the default) print "Running K-means..." codebook, _distortion = kmeans(imFeatures, 101, 100) assignments, _distortion = vq(imFeatures, codebook) pkl.dump(codebook, open('proc_data/codebook_kmeans_gradients.pkl', 'wb')) pkl.dump(assignments, open('proc_data/labelAssignments_kmeans_gradients.pkl', 'wb')) if (len(assignments) != imFeatures.shape[0]): raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." % ( stack()[0][3], imFeatures.shape[0], len(assignments)) print "Ran K-means" errorRate, goodClusters, avgEntropy = evaluateClustering( codebook, imFeatures, assignments, categoryMeans, exampleHash, 101) print "K-means produced an error rate of %.4f%%, %d \"good\" clusters and %.4f average entropy." % ( errorRate, goodClusters, avgEntropy) print "The amount of \'\"good\" clusters corresponds to %.4f%% of total clusters." % ( 100 * goodClusters / float(101)) fp = open('output_data/errorRate_gradients_kmeans.txt', 'w') fp.write(str(errorRate)) fp.close() fp = open('output_data/accurateClusters_gradients_kmeans.txt', 'w') fp.write(str(goodClusters)) fp.close() fp = open('output_data/averageEntropy_gradients_kmeans.txt', 'w') fp.write(str(avgEntropy)) fp.close()
matfile = loadmat('input_data/caltech101_SIFT/dense_bow/oneForAll_nr1_K1000/'+ cat + '/' + gr_im) FV, _binedges = np.histogram(matfile['h'], range(1001)) imFeatures.append(FV) localList.append(FV) exampleHash[exampleIndex] = i exampleIndex +=1 # Compute and store the category means categoryMeans.append(np.mean(localList, axis = 0)) # Turn imFeatures into a 2D ndarray so that it can be used in K-means later on imFeatures = np.array(imFeatures) print "Read Caltech data in memory." g1 = mixture.GMM(n_components=101,thresh = 1e-05, covariance_type='diag') print "About to fit data" g1.fit(imFeatures) pkl.dump(g1, open('proc_data/gmm_obj_diag_cov_sift.pkl', 'wb')) print "Fitted data" predLabels= g1.predict(imFeatures) print "Predicted data" predMeans = g1.means_ errRate, goodClusters, avgEntropy = evaluateClustering(g1.means_, imFeatures, predLabels, categoryMeans, exampleHash, 101) print "GMM model predicted labels with an error rate of %.4f%%, produced %d \"accurate\" clusters and %.4f average entropy." %(errRate, goodClusters, avgEntropy) print "That's all. Exiting..." quit() except Exception as exc: print "An exception occurred:" + str(exc) + "." quit()
# (mean squared error) less than e-05 (the default) print "Running K-means..." codebook, _distortion = kmeans(imFeatures, 101, 100) assignments, _distortion = vq(imFeatures, codebook) pkl.dump(codebook, open("proc_data/codebook_kmeans_gradients.pkl", "wb")) pkl.dump(assignments, open("proc_data/labelAssignments_kmeans_gradients.pkl", "wb")) if len(assignments) != imFeatures.shape[0]: raise LogicalError, "Method %s: K-means should have computed %d assignments; instead, it computed %d." % ( stack()[0][3], imFeatures.shape[0], len(assignments), ) print "Ran K-means" errorRate, goodClusters, avgEntropy = evaluateClustering( codebook, imFeatures, assignments, categoryMeans, exampleHash, 101 ) print 'K-means produced an error rate of %.4f%%, %d "good" clusters and %.4f average entropy.' % ( errorRate, goodClusters, avgEntropy, ) print 'The amount of \'"good" clusters corresponds to %.4f%% of total clusters.' % ( 100 * goodClusters / float(101) ) fp = open("output_data/errorRate_gradients_kmeans.txt", "w") fp.write(str(errorRate)) fp.close() fp = open("output_data/accurateClusters_gradients_kmeans.txt", "w")