#print nonEmpty,index agglomerative.__readin_subject__(subject,read_in_gold=True)#,users_to_skip=["caitlin.black"]) dkmeans.__readin_subject__(subject,read_in_gold=True) numClusters,time_to_cluster = agglomerative.__cluster_subject__(subject) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 30: break accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2,time_to_cluster = dkmeans.__cluster_subject__(subject, clusterAlg2,fix_distinct_clusters=True) accuracy2 = dkmeans.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) #dkmeans.__outliers__(subject) print accuracy1,accuracy2,dkmeans.__num_gold_clusters__(subject) #print numMarkings,numMarkings2
#print nonEmpty,index agglomerative.__readin_subject__( subject, read_in_gold=True) #,users_to_skip=["caitlin.black"]) dkmeans.__readin_subject__(subject, read_in_gold=True) numClusters, time_to_cluster = agglomerative.__cluster_subject__(subject) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 20: break accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = dkmeans.__cluster_subject__( subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = dkmeans.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) #dkmeans.__outliers__(subject) print accuracy1, accuracy2, dkmeans.__num_gold_clusters__(subject)
#print nonEmpty,index penguin.__readin_subject__( subject, read_in_gold=True) #,users_to_skip=["caitlin.black"]) numClusters, time_to_cluster = penguin.__cluster_subject__( subject, clusterAlg) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = penguin.__cluster_subject__( subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = penguin.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) print accuracy1, accuracy2, penguin.__num_gold_clusters__(subject) #print numMarkings,numMarkings2
# subject = "APZ000173v" subject = subject_ids[index] # print nonEmpty,index penguin.__readin_subject__(subject, read_in_gold=True) # ,users_to_skip=["caitlin.black"]) numClusters, time_to_cluster = penguin.__cluster_subject__(subject, clusterAlg) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1) numClusters2, time_to_cluster = penguin.__cluster_subject__(subject, clusterAlg2, fix_distinct_clusters=True) accuracy2 = penguin.__accuracy__(subject) X2.append(numClusters2) Y2.append(time_to_cluster) Z2.append(accuracy2) print accuracy1, accuracy2, penguin.__num_gold_clusters__(subject) # print numMarkings,numMarkings2 print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1))