Ejemplo n.º 1
0
 def find_tall_glyphs(self, stdev=20):
     from gamera import stats
     tall = []
     for i in range(len(self.glyphs)):
         g = self.glyphs[i]
         if stats.samplestdev([g.nrows, self.avg_glyph_height]) > stdev:
             tall.append(i)
     return tall
Ejemplo n.º 2
0
 def find_tall_glyphs(self, stdev=20):
     from gamera import stats
     tall = []
     for i in range(len(self.glyphs)):
         g = self.glyphs[i]
         if stats.samplestdev([g.nrows, self.avg_glyph_height]) > stdev:
             tall.append(i)
     return tall
Ejemplo n.º 3
0
def make_subtrees_stddev(graph, ratio, distance, relabel=1, lab="cluster."):
    cur_label = 0
    remove = []
    for edge in graph.get_edges():
        lengths = []
        path = {}
        #print node().get_main_id(), edge.cost
        get_lengths(edge.from_node, distance, lengths, 0, path)
        lengths.remove(edge.cost)
        #print lengths
        if not (len(lengths) > 1):
            continue
        mean = stats.mean(lengths)
        stdev2 = stats.samplestdev([mean, edge.cost])
        #print mean, stdev2, edge.cost, len(lengths)
        if stdev2 > ratio:
            #graph.remove_edge(edge)
            remove.append(edge)

    for edge in remove:
        graph.remove_edge(edge)

    if relabel:
        cur_label = 0
        for node in graph.get_nodes():
            node().classify_manual("")
        for node in graph.get_nodes():
            if node().get_main_id() == "":
                label(graph, node, lab, cur_label)
                cur_label += 1
    nodes = []

    for node in graph.get_nodes():
        nodes.append(node())

    return nodes
Ejemplo n.º 4
0
def make_subtrees_stddev(graph, ratio, distance, relabel=1, lab="cluster."):
    cur_label = 0
    remove = []
    for edge in graph.get_edges():
        lengths = []
        path = {}
        #print node().get_main_id(), edge.cost
        get_lengths(edge.from_node, distance, lengths, 0, path)
        lengths.remove(edge.cost)
        #print lengths
        if not (len(lengths) > 1):
            continue
        mean = stats.mean(lengths)
        stdev2 = stats.samplestdev([mean, edge.cost])
        #print mean, stdev2, edge.cost, len(lengths)
        if stdev2 > ratio:
            #graph.remove_edge(edge)
            remove.append(edge)

    for edge in remove:
        graph.remove_edge(edge)

    if relabel:
        cur_label = 0
        for node in graph.get_nodes():
            node().classify_manual("")
        for node in graph.get_nodes():
            if node().get_main_id() == "":
                label(graph, node, lab, cur_label)
                cur_label += 1
    nodes = []

    for node in graph.get_nodes():
        nodes.append(node())

    return nodes
Ejemplo n.º 5
0
def test():
   glyphs = gamera_xml.glyphs_from_xml(r"C:\Documents and Settings\Karl MacMillan\Desktop\test\prod.xml")

   glyphs = strip_small_categories(glyphs)
   from gamera.plugins import features
   k = knn.kNN()
   print k.features
   features.generate_features_list(glyphs, k.feature_functions)
   print "Getting gstats"

   graph_stats = get_graph_stats(glyphs, k)
   gstats = knn.get_glyphs_stats(glyphs)

   max_dist = max_distance(glyphs, k)
   print max_dist
   file = open("results.txt", "w")
   global_max = [[],[]]
   local_max = [[],[]]
   all = [[],[]]
   graph = [[],[]]
   gr_ccorrect = 0
   gr_icorrect = 0
   for x in glyphs:
      local_max_dist = local_max_distance(glyphs, x, k)
      ans = k.classify_with_images(glyphs, x, 1)
      file.write(ans[0][1] + ",")# + str(ans[0][0]) + ",")
      correct = 0
      if x.get_main_id() == ans[0][1]:
         file.write("1,")
         correct = 1
      else:
         file.write("0,")
      g = 1.0 - (ans[0][0] / max_dist)
      global_max[correct].append(g)
      file.write(str(g) + ",")

      l = 1.0 - (ans[0][0] / local_max_dist)
      local_max[correct].append(l)
      file.write(str(l) + ",")

      a = stats.samplestdev([ans[0][0],gstats[ans[0][1]][1]])
      all[correct].append(a)
      file.write(str(a) + ",")

      gr = stats.samplestdev([ans[0][0],graph_stats[ans[0][1]]])
      if (gr <= 1 and correct):
         gr_ccorrect += 1
      if (gr > 1 and not correct):
         gr_icorrect += 1
      graph[correct].append(gr)
      file.write(str(gr))

      file.write("\n")

   print "num correct: %d num incorrect: %d" % (len(global_max[1]), len(global_max[0]))
   print "confidence %f %f %f" % (((gr_ccorrect + gr_icorrect) / float(len(glyphs))),
                                  gr_ccorrect / float(len(glyphs) - len(global_max[0])),
                                  gr_icorrect / float(len(glyphs) - len(global_max[1])))

   cgm = -1
   igm = -1
   cgs = -1
   igs = -1
   if (len(global_max[0])):
      igm = stats.mean(global_max[0])
      igs = stats.samplestdev(global_max[0])
   if (len(global_max[1])):
      cgm = stats.mean(global_max[1])
      cgs = stats.samplestdev(global_max[1])

   clm = -1
   ilm = -1
   cls = -1
   ils = -1
   if (len(local_max[0])):
      ilm = stats.mean(local_max[0])
      ils = stats.samplestdev(local_max[0])
   if (len(local_max[1])):
      clm = stats.mean(local_max[1])
      cls = stats.samplestdev(local_max[1])

   cam = -1
   iam = -1
   cas = -1
   ias = -1
   if (len(all[0])):
      iam = stats.mean(all[0])
      ias = stats.samplestdev(all[0])
   if (len(all[1])):
      cam = stats.mean(all[1])
      cas = stats.samplestdev(all[1])

   cgraphm = -1
   igraphm = -1
   cgraphs = -1
   igraphs = -1
   if (len(graph[0])):
      igraphm = stats.mean(graph[0])
      igraphs = stats.samplestdev(graph[0])
   if (len(graph[1])):
      cgraphm = stats.mean(graph[1])
      cgraphs = stats.samplestdev(graph[1])

   print "global correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgm, cgs, igm, igs)
   print "local correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (clm, cls, ilm, ils)
   print "all correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cam, cas, iam, ias)
   print "graph correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgraphm, cgraphs, igraphm, igraphs)

   def otsu_threshold(p):
      l = len(p)
      mu_T = 0.0
      for i in range(l):
         mu_T += i * p[i]

      sigma_T = 0.0
      for i in range(l):
         sigma_T += (i-mu_T)*(i-mu_T)*p[i]

      k_low = 0
      while (p[k_low] == 0) and (k_low < (l - 1)):
         k_low += 1
      k_high = l - 1
      while (p[k_high] == 0) and (k_high > 0):
         k_low += 1
         k_high -= 1

      criterion = 0.0
      thresh = 127

      omega_k = 0.0
      mu_k = 0.0
      k = k_low
      while k <= k_high:
         omega_k += p[k]
         mu_k += k*p[k]

         expr_1 = (mu_T*omega_k - mu_k)
         sigma_b_k = expr_1 * expr_1 / (omega_k*(1-omega_k))
         if (criterion < sigma_b_k/sigma_T):
            criterion = sigma_b_k/sigma_T
            thresh = k;
         k += 1
      return thresh

   graph_l = graph[0][:]
   graph_l.extend(graph[1])
   graph_l.sort()
   threshold = stats.mean(graph_l)
   print "threshold: " + str(threshold)
   num_wrong = 0
   for x in graph[0]:
      if x < threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[0])) * 100

   num_wrong = 0
   for x in graph[1]:
      if x >= threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[1])) * 100

   graph_l = all[0][:]
   graph_l.extend(all[1])
   graph_l.sort()
   threshold = stats.mean(graph_l)
   print "threshold: " + str(threshold)
   num_wrong = 0
   for x in graph[0]:
      if x < threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[0])) * 100

   num_wrong = 0
   for x in graph[1]:
      if x >= threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[1])) * 100

   graph_l = local_max[0][:]
   graph_l.extend(local_max[1])
   graph_l.sort()
   threshold = stats.mean(graph_l)
   print "threshold: " + str(threshold)
   num_wrong = 0
   for x in graph[0]:
      if x < threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[0])) * 100

   num_wrong = 0
   for x in graph[1]:
      if x >= threshold:
         num_wrong += 1
   print num_wrong, num_wrong / float(len(graph[1])) * 100
Ejemplo n.º 6
0
def test():
    glyphs = gamera_xml.glyphs_from_xml(
        r"C:\Documents and Settings\Karl MacMillan\Desktop\test\prod.xml")

    glyphs = strip_small_categories(glyphs)
    from gamera.plugins import features
    k = knn.kNN()
    print(k.features)
    features.generate_features_list(glyphs, k.feature_functions)
    print("Getting gstats")

    graph_stats = get_graph_stats(glyphs, k)
    gstats = knn.get_glyphs_stats(glyphs)

    max_dist = max_distance(glyphs, k)
    print(max_dist)
    file = open("results.txt", "w")
    global_max = [[], []]
    local_max = [[], []]
    all = [[], []]
    graph = [[], []]
    gr_ccorrect = 0
    gr_icorrect = 0
    for x in glyphs:
        local_max_dist = local_max_distance(glyphs, x, k)
        ans = k.classify_with_images(glyphs, x, 1)
        file.write(ans[0][1] + ",")  # + str(ans[0][0]) + ",")
        correct = 0
        if x.get_main_id() == ans[0][1]:
            file.write("1,")
            correct = 1
        else:
            file.write("0,")
        g = 1.0 - (ans[0][0] / max_dist)
        global_max[correct].append(g)
        file.write(str(g) + ",")

        l = 1.0 - (ans[0][0] / local_max_dist)
        local_max[correct].append(l)
        file.write(str(l) + ",")

        a = stats.samplestdev([ans[0][0], gstats[ans[0][1]][1]])
        all[correct].append(a)
        file.write(str(a) + ",")

        gr = stats.samplestdev([ans[0][0], graph_stats[ans[0][1]]])
        if (gr <= 1 and correct):
            gr_ccorrect += 1
        if (gr > 1 and not correct):
            gr_icorrect += 1
        graph[correct].append(gr)
        file.write(str(gr))

        file.write("\n")

    print("num correct: %d num incorrect: %d" %
          (len(global_max[1]), len(global_max[0])))
    print("confidence %f %f %f" %
          (((gr_ccorrect + gr_icorrect) / float(len(glyphs))),
           gr_ccorrect / float(len(glyphs) - len(global_max[0])),
           gr_icorrect / float(len(glyphs) - len(global_max[1]))))

    cgm = -1
    igm = -1
    cgs = -1
    igs = -1
    if (len(global_max[0])):
        igm = stats.mean(global_max[0])
        igs = stats.samplestdev(global_max[0])
    if (len(global_max[1])):
        cgm = stats.mean(global_max[1])
        cgs = stats.samplestdev(global_max[1])

    clm = -1
    ilm = -1
    cls = -1
    ils = -1
    if (len(local_max[0])):
        ilm = stats.mean(local_max[0])
        ils = stats.samplestdev(local_max[0])
    if (len(local_max[1])):
        clm = stats.mean(local_max[1])
        cls = stats.samplestdev(local_max[1])

    cam = -1
    iam = -1
    cas = -1
    ias = -1
    if (len(all[0])):
        iam = stats.mean(all[0])
        ias = stats.samplestdev(all[0])
    if (len(all[1])):
        cam = stats.mean(all[1])
        cas = stats.samplestdev(all[1])

    cgraphm = -1
    igraphm = -1
    cgraphs = -1
    igraphs = -1
    if (len(graph[0])):
        igraphm = stats.mean(graph[0])
        igraphs = stats.samplestdev(graph[0])
    if (len(graph[1])):
        cgraphm = stats.mean(graph[1])
        cgraphs = stats.samplestdev(graph[1])

    print("global correct avg: %f stdev: %f incorrect avg: %f stddev: %f" %
          (cgm, cgs, igm, igs))
    print("local correct avg: %f stdev: %f incorrect avg: %f stddev: %f" %
          (clm, cls, ilm, ils))
    print("all correct avg: %f stdev: %f incorrect avg: %f stddev: %f" %
          (cam, cas, iam, ias))
    print("graph correct avg: %f stdev: %f incorrect avg: %f stddev: %f" %
          (cgraphm, cgraphs, igraphm, igraphs))

    def otsu_threshold(p):
        l = len(p)
        mu_T = 0.0
        for i in range(l):
            mu_T += i * p[i]

        sigma_T = 0.0
        for i in range(l):
            sigma_T += (i - mu_T) * (i - mu_T) * p[i]

        k_low = 0
        while (p[k_low] == 0) and (k_low < (l - 1)):
            k_low += 1
        k_high = l - 1
        while (p[k_high] == 0) and (k_high > 0):
            k_low += 1
            k_high -= 1

        criterion = 0.0
        thresh = 127

        omega_k = 0.0
        mu_k = 0.0
        k = k_low
        while k <= k_high:
            omega_k += p[k]
            mu_k += k * p[k]

            expr_1 = (mu_T * omega_k - mu_k)
            sigma_b_k = expr_1 * expr_1 / (omega_k * (1 - omega_k))
            if (criterion < sigma_b_k / sigma_T):
                criterion = sigma_b_k / sigma_T
                thresh = k
            k += 1
        return thresh

    graph_l = graph[0][:]
    graph_l.extend(graph[1])
    graph_l.sort()
    threshold = stats.mean(graph_l)
    print("threshold: " + str(threshold))
    num_wrong = 0
    for x in graph[0]:
        if x < threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[0])) * 100)

    num_wrong = 0
    for x in graph[1]:
        if x >= threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[1])) * 100)

    graph_l = all[0][:]
    graph_l.extend(all[1])
    graph_l.sort()
    threshold = stats.mean(graph_l)
    print("threshold: " + str(threshold))
    num_wrong = 0
    for x in graph[0]:
        if x < threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[0])) * 100)

    num_wrong = 0
    for x in graph[1]:
        if x >= threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[1])) * 100)

    graph_l = local_max[0][:]
    graph_l.extend(local_max[1])
    graph_l.sort()
    threshold = stats.mean(graph_l)
    print("threshold: " + str(threshold))
    num_wrong = 0
    for x in graph[0]:
        if x < threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[0])) * 100)

    num_wrong = 0
    for x in graph[1]:
        if x >= threshold:
            num_wrong += 1
    print(num_wrong, num_wrong / float(len(graph[1])) * 100)