def _run_locally(self, sh, result_name, func_call): namespace = {} namespace.update(sh.locals) if len(self.images) == 1: namespace['image'] = self.images[0] source = 'image.%s' % func_call result = eval(source, namespace) if result_name != '': sh.locals[result_name] = result else: namespace['images'] = self.images if result_name != '': sh.locals[result_name] = [] progress = util.ProgressFactory( 'Processing images...', len(self.images)) try: for i in range(len(self.images)): source = "images[%d].%s" % (i, func_call) result = eval(source, namespace) if result_name != '': sh.locals[result_name].append(result) progress.step() finally: progress.kill() if result_name != '': sh.run(result_name)
def make_result(self): self.make_example_glyphs() result = {} for id0 in self.example_glyphs.keys(): leaf = {} for id1 in self.example_glyphs.keys(): leaf[id1] = 0 result[id0] = leaf classifier = self.classifier glyphs = classifier.get_glyphs() progress = util.ProgressFactory("Generating confusion matrix...", len(glyphs) / 50) try: for i, glyph in enumerate(glyphs): idname, conf = classifier.classify_with_images(glyphs, glyph, True) result[glyph.get_main_id()][idname[0][1]] += 1 if i % 50 == 0: progress.step() finally: progress.kill() ids = result.keys() ids.sort() grid = self.make_grid(len(ids) + 1, len(ids) + 1) for i, id in enumerate(ids): grid[0][i+1] = self.example_glyphs[id] grid[i+1][0] = self.example_glyphs[id] for i, id0 in enumerate(ids): res = result[id0] sum = 0 for val in res.values(): sum += val for j, id0 in enumerate(ids): grid[i+1][j+1] = str(int((float(res[id0]) / sum) * 100.0)) + "%" return [("Confusion Matrix", grid)]
def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5, max_graph_size=16, criterion="min"): progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs) try: found_unions = [] for root in G.get_subgraph_roots(): if G.size_of_subgraph(root) > max_graph_size: continue best_grouping = G.optimize_partitions(root, evaluate_function, max_parts_per_group, max_graph_size, criterion) if not best_grouping is None: for subgroup in best_grouping: if len(subgroup) > 1: union = image_utilities.union_images(subgroup) found_unions.append(union) classification, confidence = self.guess_glyph_automatic( union) union.classify_heuristic(classification) part_name = "_group._part." + classification[0][1] for glyph in subgroup: glyph.classify_heuristic(part_name) progress.step() finally: progress.kill() return found_unions
def generate_features_on_glyphs(self, glyphs): """Generates features for all the given glyphs.""" progress = util.ProgressFactory("Generating features...", len(glyphs), numsteps=32) try: for i, glyph in enumerate(glyphs): self.generate_features(glyph) progress.step() finally: progress.kill()
def _pregroup(self, glyphs, function): G = graph.Undirected() G.add_nodes(glyphs) progress = util.ProgressFactory("Pre-grouping glyphs...", len(glyphs)) try: for i in range(len(glyphs)): gi = glyphs[i] for j in range(i + 1, len(glyphs)): gj = glyphs[j] if function(gi, gj): G.add_edge(gi, gj) progress.step() finally: progress.kill() return G
def generate_features_list(list, features='all'): """ Generate features on a list of images. *features* Follows the same rules as for generate_features_. """ from gamera import core, util ff = core.Image.get_feature_functions(features) progress = util.ProgressFactory("Generating features...", len(list) / 10) try: for i, glyph in enumerate(list): glyph.generate_features(ff) if i % 10 == 0: progress.step() finally: progress.kill()
def unique_distances(self, images, normalize=True): """**unique_distances** (ImageList *images*, Bool *normalize* = ``True``) Return a list of the unique pairs of images in the passed in list and the distances between them. The return list is a list of tuples of (distance, imagea, imageb) so that it easy to sort. *normalize* When true, the features are normalized before performing the distance calculations.""" self.generate_features_on_glyphs(images) l = len(images) progress = util.ProgressFactory("Generating unique distances...", l) dists = self._unique_distances(images, progress.step, normalize) #dists = self._unique_distances(images) progress.kill() return dists
def distance_matrix(self, images, normalize=True): """**distance_matrix** (ImageList *images*, Bool *normalize* = ``True``) Create a symmetric FloatImage containing all of the distances between the images in the list passed in. This is useful because it allows you to find the distance between any two pairs of images regardless of the order of the pairs. *normalize* When true, the features are normalized before performing the distance calculations.""" self.generate_features_on_glyphs(images) l = len(images) progress = util.ProgressFactory("Generating unique distances...", l) m = self._distance_matrix(images, progress.step, normalize) #m = self._distance_matrix(images) progress.kill() return m
def _classify_list_automatic(self, glyphs, max_recursion=10, recursion_level=0, progress=None): # There is a slightly convoluted handling of the progress bar here, since # this function is called recursively on split glyphs if recursion_level == 0: progress = util.ProgressFactory("Classifying glyphs...", len(glyphs)) try: if (recursion_level > max_recursion): return [], [] added = [] removed = {} for glyph in glyphs: if glyph.classification_state in (core.UNCLASSIFIED, core.AUTOMATIC): for child in glyph.children_images: removed[child] = None for glyph in glyphs: if not removed.has_key(glyph): self.generate_features(glyph) if (glyph.classification_state in (core.UNCLASSIFIED, core.AUTOMATIC)): (id, conf) = self._classify_automatic_impl(glyph) glyph.classify_automatic(id) glyph.confidence = conf adds = self._do_splits(self, glyph) progress.add_length(len(adds)) added.extend(adds) progress.step() if len(added): added_recurse, removed_recurse = self._classify_list_automatic( added, max_recursion, recursion_level + 1, progress) added.extend(added_recurse) for glyph in removed_recurse: removed[glyph] = None finally: if recursion_level == 0: progress.kill() return added, removed.keys()
def knndistance_statistics(self, k=0): """**knndistance_statistics** (Int *k* = 0) Returns a list of average distances between each training sample and its *k* nearest neighbors. So, when you have *n* training samples, *n* average distance values are returned. This can be useful for distance rejection. Each item in the returned list is a tuple (*d*, *classname*), where *d* is the average kNN distance and *classname* is the class name of the training sample. In most cases, the class name is of little interest, but it could be useful if you need class conditional distance statistics. Beware however, that the average distance is computed over neighbors belonging to any class, not just the same class. If you need the latter, you must create a new classifier from training samples belonging only to the specific class. When *k* is zero, the property ``num_k`` of the knn classifier is used. """ self.instantiate_from_images(self.database, self.normalize) progress = util.ProgressFactory("Generating knndistance statistics...", len(self.database)) stats = self._knndistance_statistics(k, progress.step) progress.kill() return stats
def perform_rules(self, glyphs, grid_size=100, recurse=0, progress=None, _recursion_level=0): self._exceptions = util.Set() if _recursion_level > 10: return [], [] elif _recursion_level == 0: progress = util.ProgressFactory("Performing rules...") try: grid_index = group.GridIndexWithKeys(glyphs, grid_size, grid_size) found_regexs = {} for regex_string, compiled in self._regexs.items(): for glyph in glyphs: if glyph.match_id_name(compiled): grid_index.add_glyph_by_key(glyph, regex_string) found_regexs[regex_string] = None # This loop is only so the progress bar can do something useful. for regex in found_regexs.iterkeys(): progress.add_length( len(self._rules_by_regex[regex]) * len(grid_index.get_glyphs_by_key(regex))) added = {} removed = {} for regex in found_regexs.iterkeys(): for rule in self._rules_by_regex[regex]: glyph_specs = rule.func_defaults for glyph in grid_index.get_glyphs_by_key(regex): if len(glyph_specs) == 1: self._deal_with_result(rule, (glyph, ), added, removed) elif len(glyph_specs) == 2: for glyph2 in grid_index.get_glyphs_around_glyph_by_key( glyph, glyph_specs[1]): stop = self._deal_with_result( rule, (glyph, glyph2), added, removed) if not self._reapply and stop: break else: seed = [ list( grid_index.get_glyphs_around_glyph_by_key( glyph, x)) for x in glyph_specs[1:] ] for combination in util.combinations(seed): stop = self._deal_with_result( rule, [glyph] + combination, added, removed) if not self._reapply and stop: break progress.step() finally: if _recursion_level == 0: progress.kill() if recurse and len(added): self._deal_with_result( self.perform_rules(added.keys(), 1, progress, _recursion_level + 1)) if len(self._exceptions): s = ("One or more of the rule functions caused an exception.\n" + "(Each exception listed only once):\n\n" + "\n".join(self._exceptions)) raise RuleEngineError(s) return added.keys(), removed.keys()