コード例 #1
0
            #print penguin_clusters
            #print noise__
            noise_points[s] = [x for x,u in noise__]
            print str(s) + "  -  " + str(len(user_identified_penguins))

            if len(user_identified_penguins) > 20:
                break
    except AssertionError:
        continue

    #if len(user_identified_penguins) == 0:
    #    continue

    if len(user_identified_penguins) <= 20:
        #print noise__
        not_found = cluster_compare(penguins[0],penguins[-1])
        #if not_found == []:
        #    continue



        image_file = cbook.get_sample_data(base_directory + "/Databases/penguins/images/"+object_id+".JPG")
        image = plt.imread(image_file)
        fig, ax = plt.subplots()
        im = ax.imshow(image)

        try:
            X,Y = zip(*penguins_center[5])
            plt.plot(X,Y,'.',color="red")
        except ValueError:
            pass
コード例 #2
0
        gold_centers,gold_clusters,noise__ = DivisiveDBSCAN(num_markings).fit(user_markings[20],user_ips[20],debug=True)#,jpeg_file=base_directory + "/Databases/penguins/images/"+object_id+".JPG")
        print "gold standard number " + str(len(gold_clusters))


        #not_found = []
        fake_penguins = {}
        for s in [5,10,15]:
            print "== " + str(s)
            #not_found.append([])
            fake_penguins[s] = []

            for nn in [1,2,3,4,5]:
                user_identified_penguins,penguin_clusters,noise__ = DivisiveDBSCAN(nn).fit(user_markings[s],user_ips[s],debug=True)

                #missed penguins - in gold standard but not found
                not_found = cluster_compare(penguin_clusters,gold_clusters)

                #fake penguins - found but not actually real
                fake_found = cluster_compare(gold_clusters,penguin_clusters)

                print len(not_found),len(fake_found)
                #not_found[-1].append(len(gold_clusters) -len(not_found))
                fake_penguins[s].append(len(fake_found))

                overall_found[s][nn-1].append((len(gold_clusters) - len(not_found))/float(len(gold_clusters)))

        #false positives - fake penguins - can only be calculated after everything else is done
        for s in [5,10,15]:
            for nn in [1,2,3,4,5]:
                if fake_penguins[15][0] == 0:
                    overall_fake[s][nn-1].append(0)
コード例 #3
0
ファイル: penguin_ibcc.py プロジェクト: lelou6666/aggregation
print "done that"

total = 0
true_positives = []
false_positives = []
with open(base_directory + "/Databases/penguins_ibcc.out", 'rb') as f:
    for l in f.readlines():
        penguin_index, neg_prob, pos_prob = l.split(" ")

        penguin = penguins[max_users][image_index][1][int(
            float(penguin_index))][0]

        #is this penguin "real" ie. is in the gold standard?
        if cluster_compare(gold_standard, [
                penguin,
        ]) == []:
            #yes - penguin is real
            true_positives.append(float(pos_prob))
        else:
            #penguin is fake
            false_positives.append(float(pos_prob))

print min(sorted(true_positives)[2:])
print max(false_positives)
print len(false_positives)

X = []
Y = []
for p in np.arange(0, 1.01, 0.001):
    X.append(
コード例 #4
0
ibcc.runIbcc(base_directory+"/Databases/penguins_ibcc_config.py")

print "done that"

total = 0
true_positives = []
false_positives = []
with open(base_directory+"/Databases/penguins_ibcc.out",'rb') as f:
    for l in f.readlines():
        penguin_index, neg_prob,pos_prob = l.split(" ")

        penguin = penguins[max_users][image_index][1][int(float(penguin_index))][0]

        #is this penguin "real" ie. is in the gold standard?
        if cluster_compare(gold_standard,[penguin,]) == []:
            #yes - penguin is real
            true_positives.append(float(pos_prob))
        else:
            #penguin is fake
            false_positives.append(float(pos_prob))

print min(sorted(true_positives)[2:])
print max(false_positives)
print len(false_positives)

X = []
Y = []
for p in np.arange(0,1.01,0.001):
    X.append(len([f for f in false_positives if f >= p])/float(len(false_positives)))
    Y.append(len([t for t in true_positives if t >= p])/float(37.))
コード例 #5
0
ファイル: roc.py プロジェクト: alexbfree/aggregation
        print "gold standard number " + str(len(gold_clusters))

        # not_found = []
        fake_penguins = {}
        for s in [5, 10, 15]:
            print "== " + str(s)
            # not_found.append([])
            fake_penguins[s] = []

            for nn in [1, 2, 3, 4, 5]:
                user_identified_penguins, penguin_clusters, noise__ = DivisiveDBSCAN(nn).fit(
                    user_markings[s], user_ips[s], debug=True
                )

                # missed penguins - in gold standard but not found
                not_found = cluster_compare(penguin_clusters, gold_clusters)

                # fake penguins - found but not actually real
                fake_found = cluster_compare(gold_clusters, penguin_clusters)

                print len(not_found), len(fake_found)
                # not_found[-1].append(len(gold_clusters) -len(not_found))
                fake_penguins[s].append(len(fake_found))

                overall_found[s][nn - 1].append((len(gold_clusters) - len(not_found)) / float(len(gold_clusters)))

        # false positives - fake penguins - can only be calculated after everything else is done
        for s in [5, 10, 15]:
            for nn in [1, 2, 3, 4, 5]:
                if fake_penguins[15][0] == 0:
                    overall_fake[s][nn - 1].append(0)
コード例 #6
0
            #print penguin_clusters
            #print noise__
            noise_points[s] = [x for x, u in noise__]
            print str(s) + "  -  " + str(len(user_identified_penguins))

            if len(user_identified_penguins) > 20:
                break
    except AssertionError:
        continue

    #if len(user_identified_penguins) == 0:
    #    continue

    if len(user_identified_penguins) <= 20:
        #print noise__
        not_found = cluster_compare(penguins[0], penguins[-1])
        #if not_found == []:
        #    continue

        image_file = cbook.get_sample_data(base_directory +
                                           "/Databases/penguins/images/" +
                                           object_id + ".JPG")
        image = plt.imread(image_file)
        fig, ax = plt.subplots()
        im = ax.imshow(image)

        try:
            X, Y = zip(*penguins_center[5])
            plt.plot(X, Y, '.', color="red")
        except ValueError:
            pass
コード例 #7
0
    sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg")
else:
    sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg")

from clusterCompare import cluster_compare

if os.path.exists("/home/ggdhines"):
    base_directory = "/home/ggdhines"
else:
    base_directory = "/home/greg"

penguins,temp = pickle.load(open(base_directory+"/Databases/penguins_vote.pickle","rb"))

#does this cluster have a corresponding cluster in the gold standard data?
#ie. does this cluster represent an actual penguin?

for image_index in range(len(penguins[5])):
    print len(penguins[5])
    user_penguins = penguins[5][0]
    gold_penguins = penguins[5][1]

    print len(user_penguins)
    print len(user_penguins[0])

    print "==="
    print len(user_penguins)
    for upen in user_penguins:
        print upen
        print cluster_compare(gold_penguins,[upen,])

    break
コード例 #8
0
else:
    sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg")

from clusterCompare import cluster_compare

if os.path.exists("/home/ggdhines"):
    base_directory = "/home/ggdhines"
else:
    base_directory = "/home/greg"

penguins, temp = pickle.load(open(base_directory + "/Databases/penguins_vote.pickle", "rb"))

# does this cluster have a corresponding cluster in the gold standard data?
# ie. does this cluster represent an actual penguin?

for image_index in range(len(penguins[5])):
    print len(penguins[5])
    user_penguins = penguins[5][0]
    gold_penguins = penguins[5][1]

    print len(user_penguins)
    print len(user_penguins[0])

    print "==="
    print len(user_penguins)
    for upen in user_penguins:
        print upen
        print cluster_compare(gold_penguins, [upen])

    break