Ejemplo n.º 1
0
clusterAlg = DivisiveKmeans().__fit__

penguin = PenguinAggregation()

gold_subjects = penguin.__get_gold_subjects__()
gold_sample = gold_subjects[:50]

penguin.__readin_users__()

for count,zooniverse_id in enumerate(gold_sample):
    if count == 50:
        break
    print count, zooniverse_id
    penguin.__readin_subject__(zooniverse_id,read_in_gold=True)

    blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg,fix_distinct_clusters=True,correction_alg=correctionAlg)
    penguin.__soy_it__(zooniverse_id)


    penguin.__signal_ibcc__()
    penguin.__roc__()
# one_overlap = penguin.__off_by_one__(display=True)
# last_id = None
#
# for t in one_overlap:
#     if t[0] != last_id:
#         print "*****"
#         print "====="
#         last_id = t[0]
#     penguin.__relative_confusion__(t)
Ejemplo n.º 2
0
print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 > z2]) / float(len(Z1))
print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 < z2]) / float(len(Z1))
print len([z1 for (z1, z2) in zip(Z1, Z2) if z1 == z2]) / float(len(Z1))

plt.plot(Z2, Z1, '.', color="black")
plt.xlabel("Number of Clusters Found by Divisive K-Means")
plt.ylabel("Number of Clusters Found by Agglomerative Clustering")
plt.plot([0, max(max(Z1), max(Z2)) + 10], [0, max(max(Z1), max(Z2)) + 10],
         "--",
         color="black")
plt.xlim((0, max(max(Z1), max(Z2)) + 10))
plt.ylim((0, max(max(Z1), max(Z2)) + 10))
plt.show()

agglomerative.__signal_ibcc__()
X, Y = agglomerative.__roc__()
plt.plot(X, Y, color="red")

dkmeans.__signal_ibcc__()
X, Y = dkmeans.__roc__()
plt.plot(X, Y, color="green")

plt.show()

#
# plt.plot(X1,Y1,"+",color="black",label="Agglomerative")
# plt.plot(X2,Y2,"o",color="black",label = "Divisive k-means")
# plt.xlabel("Number of Clusters")
# plt.ylabel("Runtime of Clustering Algorithm")
# plt.legend(loc="upper left")
# print len(X1),len(X2)
Ejemplo n.º 3
0

print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 > z2])/float(len(Z1))
print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 < z2])/float(len(Z1))
print len([z1 for (z1,z2) in zip(Z1,Z2) if z1 == z2])/float(len(Z1))

plt.plot(Z2,Z1,'.',color="black")
plt.xlabel("Number of Clusters Found by Divisive K-Means")
plt.ylabel("Number of Clusters Found by Agglomerative Clustering")
plt.plot([0,max(max(Z1),max(Z2))+10],[0,max(max(Z1),max(Z2))+10],"--",color="black")
plt.xlim((0,max(max(Z1),max(Z2))+10))
plt.ylim((0,max(max(Z1),max(Z2))+10))
plt.show()

agglomerative.__signal_ibcc__()
X,Y = agglomerative.__roc__()
plt.plot(X,Y,color="red")

dkmeans.__signal_ibcc__()
X,Y = dkmeans.__roc__()
plt.plot(X,Y,color="green")

plt.show()

#
# plt.plot(X1,Y1,"+",color="black",label="Agglomerative")
# plt.plot(X2,Y2,"o",color="black",label = "Divisive k-means")
# plt.xlabel("Number of Clusters")
# plt.ylabel("Runtime of Clustering Algorithm")
# plt.legend(loc="upper left")
# print len(X1),len(X2)
Ejemplo n.º 4
0
        width = s["metadata"]["original_size"]["width"]
        height = s["metadata"]["original_size"]["height"]

        pts = [(int(x) / (width / 1000.), int(y) / (height / 563.))
               for (x, y) in pts]

        if penguin.__get_status__(zooniverse_id) != "complete":
            continue
        penguin.__readin_subject__(zooniverse_id)

        blankImage = penguin.__cluster_subject__(zooniverse_id,
                                                 clusterAlg,
                                                 fix_distinct_clusters=True)

penguin.__roc__()
#__ibcc__2(penguin.clusterResults,penguin.users_per_subject)

# plt.plot(Xt,Yt,'.')
# plt.xlabel("Large cluster size")
# plt.ylabel("Small cluster size")
# plt.xlim((min(Xt)-0.05,max(Xt)+0.05))
# plt.ylim((min(Yt)-0.05,max(Yt)+0.05))
# plt.show()
# for i in range(1,10):
#     print sum([1 for j in Yt if i == j])
# Y = []
# yErr = []
# X = []
# for i in range(1,10):
#     y = []