Example #1
0
 def _run_locally(self, sh, result_name, func_call):
    namespace = {}
    namespace.update(sh.locals)
    if len(self.images) == 1:
       namespace['image'] = self.images[0]
       source = 'image.%s' % func_call
       result = eval(source, namespace)
       if result_name != '':
          sh.locals[result_name] = result
    else:
       namespace['images'] = self.images
       if result_name != '':
          sh.locals[result_name] = []
       progress = util.ProgressFactory(
         'Processing images...', len(self.images))
       try:
          for i in range(len(self.images)):
             source = "images[%d].%s" % (i, func_call)
             result = eval(source, namespace)
             if result_name != '':
                sh.locals[result_name].append(result)
             progress.step()
       finally:
          progress.kill()
    if result_name != '':
       sh.run(result_name)
Example #2
0
   def make_result(self):
      self.make_example_glyphs()
      result = {}
      for id0 in self.example_glyphs.keys():
         leaf = {}
         for id1 in self.example_glyphs.keys():
            leaf[id1] = 0
         result[id0] = leaf
         
      classifier = self.classifier
      glyphs = classifier.get_glyphs()
      progress = util.ProgressFactory("Generating confusion matrix...", len(glyphs) / 50)
      try:
         for i, glyph in enumerate(glyphs):
            idname, conf = classifier.classify_with_images(glyphs, glyph, True)
            result[glyph.get_main_id()][idname[0][1]] += 1
            if i % 50 == 0:
               progress.step()
      finally:
         progress.kill()

      ids = result.keys()
      ids.sort()
      grid = self.make_grid(len(ids) + 1, len(ids) + 1)
      for i, id in enumerate(ids):
         grid[0][i+1] = self.example_glyphs[id]
         grid[i+1][0] = self.example_glyphs[id]
      for i, id0 in enumerate(ids):
         res = result[id0]
         sum = 0
         for val in res.values():
            sum += val
         for j, id0 in enumerate(ids):
            grid[i+1][j+1] = str(int((float(res[id0]) / sum) * 100.0)) + "%"
      return [("Confusion Matrix", grid)]
Example #3
0
 def _find_group_unions(self,
                        G,
                        evaluate_function,
                        max_parts_per_group=5,
                        max_graph_size=16,
                        criterion="min"):
     progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs)
     try:
         found_unions = []
         for root in G.get_subgraph_roots():
             if G.size_of_subgraph(root) > max_graph_size:
                 continue
             best_grouping = G.optimize_partitions(root, evaluate_function,
                                                   max_parts_per_group,
                                                   max_graph_size,
                                                   criterion)
             if not best_grouping is None:
                 for subgroup in best_grouping:
                     if len(subgroup) > 1:
                         union = image_utilities.union_images(subgroup)
                         found_unions.append(union)
                         classification, confidence = self.guess_glyph_automatic(
                             union)
                         union.classify_heuristic(classification)
                         part_name = "_group._part." + classification[0][1]
                     for glyph in subgroup:
                         glyph.classify_heuristic(part_name)
             progress.step()
     finally:
         progress.kill()
     return found_unions
Example #4
0
 def generate_features_on_glyphs(self, glyphs):
     """Generates features for all the given glyphs."""
     progress = util.ProgressFactory("Generating features...",
                                     len(glyphs),
                                     numsteps=32)
     try:
         for i, glyph in enumerate(glyphs):
             self.generate_features(glyph)
             progress.step()
     finally:
         progress.kill()
Example #5
0
 def _pregroup(self, glyphs, function):
     G = graph.Undirected()
     G.add_nodes(glyphs)
     progress = util.ProgressFactory("Pre-grouping glyphs...", len(glyphs))
     try:
         for i in range(len(glyphs)):
             gi = glyphs[i]
             for j in range(i + 1, len(glyphs)):
                 gj = glyphs[j]
                 if function(gi, gj):
                     G.add_edge(gi, gj)
             progress.step()
     finally:
         progress.kill()
     return G
Example #6
0
def generate_features_list(list, features='all'):
    """
    Generate features on a list of images.

    *features*
      Follows the same rules as for generate_features_.
    """
    from gamera import core, util
    ff = core.Image.get_feature_functions(features)
    progress = util.ProgressFactory("Generating features...", len(list) / 10)
    try:
        for i, glyph in enumerate(list):
            glyph.generate_features(ff)
            if i % 10 == 0:
                progress.step()
    finally:
        progress.kill()
Example #7
0
   def unique_distances(self, images, normalize=True):
      """**unique_distances** (ImageList *images*, Bool *normalize* = ``True``)

Return a list of the unique pairs of images in the passed in list
and the distances between them. The return list is a list of tuples
of (distance, imagea, imageb) so that it easy to sort.

*normalize*
  When true, the features are normalized before performing the distance
  calculations."""
      self.generate_features_on_glyphs(images)
      l = len(images)
      progress = util.ProgressFactory("Generating unique distances...", l)
      dists = self._unique_distances(images, progress.step, normalize)
      #dists = self._unique_distances(images)
      progress.kill()
      return dists
Example #8
0
   def distance_matrix(self, images, normalize=True):
      """**distance_matrix** (ImageList *images*, Bool *normalize* = ``True``)

Create a symmetric FloatImage containing all of the
distances between the images in the list passed in. This is useful
because it allows you to find the distance between any two pairs
of images regardless of the order of the pairs.

*normalize*
  When true, the features are normalized before performing the distance
  calculations."""
      self.generate_features_on_glyphs(images)
      l = len(images)
      progress = util.ProgressFactory("Generating unique distances...", l)
      m = self._distance_matrix(images, progress.step, normalize)
      #m = self._distance_matrix(images)
      progress.kill()
      return m
Example #9
0
 def _classify_list_automatic(self,
                              glyphs,
                              max_recursion=10,
                              recursion_level=0,
                              progress=None):
     # There is a slightly convoluted handling of the progress bar here, since
     # this function is called recursively on split glyphs
     if recursion_level == 0:
         progress = util.ProgressFactory("Classifying glyphs...",
                                         len(glyphs))
     try:
         if (recursion_level > max_recursion):
             return [], []
         added = []
         removed = {}
         for glyph in glyphs:
             if glyph.classification_state in (core.UNCLASSIFIED,
                                               core.AUTOMATIC):
                 for child in glyph.children_images:
                     removed[child] = None
         for glyph in glyphs:
             if not removed.has_key(glyph):
                 self.generate_features(glyph)
             if (glyph.classification_state
                     in (core.UNCLASSIFIED, core.AUTOMATIC)):
                 (id, conf) = self._classify_automatic_impl(glyph)
                 glyph.classify_automatic(id)
                 glyph.confidence = conf
                 adds = self._do_splits(self, glyph)
                 progress.add_length(len(adds))
                 added.extend(adds)
             progress.step()
         if len(added):
             added_recurse, removed_recurse = self._classify_list_automatic(
                 added, max_recursion, recursion_level + 1, progress)
             added.extend(added_recurse)
             for glyph in removed_recurse:
                 removed[glyph] = None
     finally:
         if recursion_level == 0:
             progress.kill()
     return added, removed.keys()
Example #10
0
   def knndistance_statistics(self, k=0):
      """**knndistance_statistics** (Int *k* = 0)

Returns a list of average distances between each training sample and its *k*
nearest neighbors. So, when you have *n* training samples, *n* average
distance values are returned. This can be useful for distance rejection.

Each item in the returned list is a tuple (*d*, *classname*), where
*d* is the average kNN distance and *classname* is the class name of the
training sample. In most cases, the class name is of little interest,
but it could be useful if you need class conditional distance statistics.
Beware however, that the average distance is computed over neighbors
belonging to any class, not just the same class. If you need the latter,
you must create a new classifier from training samples belonging only
to the specific class.

When *k* is zero, the property ``num_k`` of the knn classifier is used.
"""
      self.instantiate_from_images(self.database, self.normalize)
      progress = util.ProgressFactory("Generating knndistance statistics...", len(self.database))
      stats = self._knndistance_statistics(k, progress.step)
      progress.kill()
      return stats
Example #11
0
    def perform_rules(self,
                      glyphs,
                      grid_size=100,
                      recurse=0,
                      progress=None,
                      _recursion_level=0):
        self._exceptions = util.Set()
        if _recursion_level > 10:
            return [], []
        elif _recursion_level == 0:
            progress = util.ProgressFactory("Performing rules...")

        try:
            grid_index = group.GridIndexWithKeys(glyphs, grid_size, grid_size)
            found_regexs = {}
            for regex_string, compiled in self._regexs.items():
                for glyph in glyphs:
                    if glyph.match_id_name(compiled):
                        grid_index.add_glyph_by_key(glyph, regex_string)
                        found_regexs[regex_string] = None

            # This loop is only so the progress bar can do something useful.
            for regex in found_regexs.iterkeys():
                progress.add_length(
                    len(self._rules_by_regex[regex]) *
                    len(grid_index.get_glyphs_by_key(regex)))

            added = {}
            removed = {}
            for regex in found_regexs.iterkeys():
                for rule in self._rules_by_regex[regex]:
                    glyph_specs = rule.func_defaults
                    for glyph in grid_index.get_glyphs_by_key(regex):
                        if len(glyph_specs) == 1:
                            self._deal_with_result(rule, (glyph, ), added,
                                                   removed)
                        elif len(glyph_specs) == 2:
                            for glyph2 in grid_index.get_glyphs_around_glyph_by_key(
                                    glyph, glyph_specs[1]):
                                stop = self._deal_with_result(
                                    rule, (glyph, glyph2), added, removed)
                                if not self._reapply and stop:
                                    break
                        else:
                            seed = [
                                list(
                                    grid_index.get_glyphs_around_glyph_by_key(
                                        glyph, x)) for x in glyph_specs[1:]
                            ]
                            for combination in util.combinations(seed):
                                stop = self._deal_with_result(
                                    rule, [glyph] + combination, added,
                                    removed)
                                if not self._reapply and stop:
                                    break
                        progress.step()
        finally:
            if _recursion_level == 0:
                progress.kill()
        if recurse and len(added):
            self._deal_with_result(
                self.perform_rules(added.keys(), 1, progress,
                                   _recursion_level + 1))

        if len(self._exceptions):
            s = ("One or more of the rule functions caused an exception.\n" +
                 "(Each exception listed only once):\n\n" +
                 "\n".join(self._exceptions))
            raise RuleEngineError(s)

        return added.keys(), removed.keys()