break accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2,time_to_cluster = dkmeans.__cluster_subject__(subject, clusterAlg2,fix_distinct_clusters=True) accuracy2 = dkmeans.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) #dkmeans.__outliers__(subject) print accuracy1,accuracy2,dkmeans.__num_gold_clusters__(subject) #print numMarkings,numMarkings2 print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 > z2])/float(len(Z1)) print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 < z2])/float(len(Z1)) print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 == z2])/float(len(Z1)) plt.plot(Z2,Z1,'.',color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0,max(max(Z1),max(Z2))+10],[0,max(max(Z1),max(Z2))+10],"--",color="black") plt.xlim((0,max(max(Z1),max(Z2))+10)) plt.ylim((0,max(max(Z1),max(Z2))+10)) plt.show()
accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = dkmeans.__cluster_subject__( subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = dkmeans.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) #dkmeans.__outliers__(subject) print accuracy1, accuracy2, dkmeans.__num_gold_clusters__(subject) #print numMarkings,numMarkings2 print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 < z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 == z2]) / float(len(Z1)) plt.plot(Z2, Z1, '.', color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0, max(max(Z1), max(Z2)) + 10], [0, max(max(Z1), max(Z2)) + 10], "--", color="black") plt.xlim((0, max(max(Z1), max(Z2)) + 10)) plt.ylim((0, max(max(Z1), max(Z2)) + 10)) plt.show()
if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = penguin.__cluster_subject__( subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = penguin.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) print accuracy1, accuracy2, penguin.__num_gold_clusters__(subject) #print numMarkings,numMarkings2 print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 < z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 == z2]) / float(len(Z1)) plt.plot(Z2, Z1, '.', color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0, max(max(Z1), max(Z2)) + 10], [0, max(max(Z1), max(Z2)) + 10], "--", color="black") plt.xlim((0, max(max(Z1), max(Z2)) + 10)) plt.ylim((0, max(max(Z1), max(Z2)) + 10)) plt.show()
nonEmpty += 1 if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = penguin.__cluster_subject__(subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = penguin.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) print accuracy1, accuracy2, penguin.__num_gold_clusters__(subject) # print numMarkings,numMarkings2 print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 < z2]) / float(len(Z1)) print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 == z2]) / float(len(Z1)) plt.plot(Z2, Z1, ".", color="black") plt.xlabel("Number of Clusters Found by Divisive K-Means") plt.ylabel("Number of Clusters Found by Agglomerative Clustering") plt.plot([0, max(max(Z1), max(Z2)) + 10], [0, max(max(Z1), max(Z2)) + 10], "--", color="black") plt.xlim((0, max(max(Z1), max(Z2)) + 10)) plt.ylim((0, max(max(Z1), max(Z2)) + 10)) plt.show() # # plt.plot(X1,Y1,"+",color="black",label="Agglomerative")