def test_interactive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() classifier = knn.kNNInteractive([],features=featureset) assert classifier.is_interactive() assert len(classifier.get_glyphs()) == 0 classifier.from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) _test_training(classifier, ccs) length = len(classifier.get_glyphs()) # subtract len(group_parts) because to_xml_filename() does # not save "_group._part" group_parts = [x for x in classifier.get_glyphs() if x.get_main_id().startswith("_group._part")] length = length - len(group_parts) classifier.to_xml_filename("tmp/testline_classifier.xml") classifier.from_xml_filename("tmp/testline_classifier.xml") assert len(classifier.get_glyphs()) == length classifier.merge_from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == length + 66 classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == 66
def glyph_classification(self): """ Glyph classification. Returns a list of the classified glyphs with its position and size. """ cknn = knn.kNNInteractive([], ["area", "aspect_ratio", "black_area", "compactness", "moments", "ncols_feature", "nholes", "nholes_extended", "nrows_feature", "skeleton_features", "top_bottom", "volume", "volume16regions", "volume64regions", "zernike_moments"], True, 8) cknn.from_xml_filename(self.classifier_glyphs) cknn.load_settings(self.classifier_weights) # Option for loading the features and weights of the training stage. ccs = self.img_no_st.cc_analysis() grouping_function = classify.ShapedGroupingFunction(16) # variable ? self.classified_image = cknn.group_and_update_list_automatic(ccs, grouping_function, max_parts_per_group = 4) # variable ?
def make_spanning_tree(glyphs, k=None): if k is None: k = knn.kNNInteractive() uniq_dists = k.distance_matrix(glyphs, 0) g = graph.Undirected() g.create_minimum_spanning_tree(glyphs, uniq_dists) return g
def test_interactive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() classifier = knn.kNNInteractive([], features=featureset) assert classifier.is_interactive() assert len(classifier.get_glyphs()) == 0 classifier.from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) _test_training(classifier, ccs) length = len(classifier.get_glyphs()) # subtract len(group_parts) because to_xml_filename() does # not save "_group._part" group_parts = [ x for x in classifier.get_glyphs() if x.get_main_id().startswith("_group._part") ] length = length - len(group_parts) classifier.to_xml_filename("tmp/testline_classifier.xml") classifier.from_xml_filename("tmp/testline_classifier.xml") assert len(classifier.get_glyphs()) == length classifier.merge_from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == length + 66 classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.from_xml_filename("data/testline.xml") assert len(classifier.get_glyphs()) == 66
def init_classifier(self,filename=None,features=["volume64regions"]): self.l.debug("features=%s, filename=%s",features,filename) self.classifier=knn.kNNInteractive([],features, 0) self.classifier.num_k = self.k if not filename is None: self.load_new_training_data(filename) else: self.invalidate_images()
def init_classifier(self, filename=None, features=["volume64regions"]): self.l.debug("features=%s, filename=%s", features, filename) self.classifier = knn.kNNInteractive([], features, 0) self.classifier.num_k = self.k if not filename is None: self.load_new_training_data(filename) else: self.invalidate_images()
def gui_show_remove_lyrics(self): """ GUI function: Deletes the lyrics out of an image. When a trainfile is given, the lyrics are removed by the trained based method. Otherwise the rule based method is used. """ if has_gui.has_gui: dialog=Args([FileOpen("Trainfile (optional)", "", "*.*")],\ "Lyrics removal") params=dialog.show() if params[0] != None: filename=params[0] # # create a classifier and load the database # #print "Classify" ccs=self.fullimage.cc_analysis() classifier=knn.kNNInteractive([], [\ 'aspect_ratio',\ 'moments',\ 'nrows_feature',\ 'volume64regions',\ ], 0) classifier.num_k = 1 classifier.from_xml_filename(filename) # # classify the connected components of the image, use half of oligon_height # height to decide about grouped glyphs # grp_distance = max([self.oligon_height/2,4]) #grp_distance=4 added, removed=classifier.group_list_automatic(ccs,\ BoundingBoxGroupingFunction(grp_distance),max_parts_per_group=2) # # process groups detected by gamera's grouping algorithm # and remove group.parts and trash # ccs=[x for x in ccs if \ (not x.match_id_name("_group._part.*")) and \ (not x.match_id_name("*trash*"))] if len(added)>0: ccs.extend(added) self.remove_lyrics(ccs,debug=0) else: self.remove_lyrics()
def __bar_candidate_grouping(ungrouped_bars): """ Groups bar candidates """ for bar in ungrouped_bars: bar.classify_heuristic('_group._part.bc') cknn = knn.kNNInteractive() cknn.set_glyphs(ungrouped_bars) grouped_bars = cknn.group_and_update_list_automatic(ungrouped_bars, max_parts_per_group=10, grouping_function=BoundingBoxGroupingFunction(5000)) # Threshold distance in pixels between bounding boxes return grouped_bars
def ShowClassifier(classifier=None, current_database=[], image=None, symbol_table=[]): if classifier is None: from gamera import knn classifier = knn.kNNInteractive() wx.BeginBusyCursor() try: class_disp = classifier_display.ClassifierFrame(classifier, symbol_table) class_disp.set_image(current_database, image) class_disp.Show(1) finally: wx.EndBusyCursor() return class_disp
def __init__(self, mode="wholistic", splits=0, feats=["aspect_ratio", "volume64regions", "moments", "nholes_extended"], hocr=None): """Signature: ``init (mode="wholistic")`` where *mode* can be "wholistic" or "separatistic". """ self.optimizeknn = False self.debug = False self.cknn = knn.kNNInteractive([], feats, splits) self.autogroup = False self.output = "" self.mode = mode self.hocr = hocr
def __bar_candidate_grouping(ungrouped_bars): """ Groups bar candidates """ for bar in ungrouped_bars: bar.classify_heuristic('_group._part.bc') cknn = knn.kNNInteractive() cknn.set_glyphs(ungrouped_bars) grouped_bars = cknn.group_and_update_list_automatic( ungrouped_bars, max_parts_per_group=10, grouping_function=BoundingBoxGroupingFunction(5000) ) # Threshold distance in pixels between bounding boxes return grouped_bars
def _copyClassifier(original, k=0): """Copy a given kNNClassifer by constructing a new one with identical parameters. *original* The classifier to be copied *k* If the copy shall have another k-value as the original, set k accordingly. k = 0 means, that the original's k-value will be used""" if k == 0: k = original.num_k return kNNInteractive(list(original.get_glyphs()), original.features, original._perform_splits, k)
def _copyClassifier(original, k = 0): """Copy a given kNNClassifer by constructing a new one with identical parameters. *original* The classifier to be copied *k* If the copy shall have another k-value as the original, set k accordingly. k = 0 means, that the original's k-value will be used""" if k == 0: k = original.num_k return kNNInteractive( list(original.get_glyphs()), original.features, original._perform_splits, k)
def __call__(self, classifier, k=0, randomize=True): # special case of empty classifier if (not classifier.get_glyphs()): return _copyClassifier(classifier) if k == 0: k = classifier.num_k progress = ProgressFactory("Generating edited CNN classifier...", len(classifier.get_glyphs())) # initialize Store (a) with a single element if randomize: elem = _randomSetElement(classifier.get_glyphs()) else: elem = classifier.get_glyphs().__iter__().next() aGlyphs = [elem] a = kNNInteractive(aGlyphs, classifier.features, classifier._perform_splits, k) progress.step() # initialize Grabbag (b) with all other b = classifier.get_glyphs().copy() b.remove(aGlyphs[0]) # Classify each glyph in b with a as the classifier # If glyph is misclassified, add it to a, repeat until no elements are # added to a changed = True while changed == True: changed = False # copy needed because iteration through dict is not possible while # deleting items from it copyOfB = b.copy() for glyph in copyOfB: if glyph.get_main_id() != _getMainId( a.guess_glyph_automatic(glyph)): b.remove(glyph) a.get_glyphs().add(glyph) progress.step() changed = True progress.kill() a.num_k = 1 return a
def __call__(self, classifier, k = 0, randomize = True): # special case of empty classifier if (not classifier.get_glyphs()): return _copyClassifier(classifier) if k == 0: k = classifier.num_k progress = ProgressFactory("Generating edited CNN classifier...", len(classifier.get_glyphs())) # initialize Store (a) with a single element if randomize: elem = _randomSetElement(classifier.get_glyphs()) else: elem = classifier.get_glyphs().__iter__().next() aGlyphs = [elem] a = kNNInteractive(aGlyphs, classifier.features, classifier._perform_splits, k) progress.step() # initialize Grabbag (b) with all other b = classifier.get_glyphs().copy() b.remove(aGlyphs[0]); # Classify each glyph in b with a as the classifier # If glyph is misclassified, add it to a, repeat until no elements are # added to a changed = True while changed == True: changed = False # copy needed because iteration through dict is not possible while # deleting items from it copyOfB = b.copy() for glyph in copyOfB: if glyph.get_main_id() != _getMainId(a.guess_glyph_automatic(glyph)): b.remove(glyph) a.get_glyphs().add(glyph) progress.step() changed = True progress.kill() a.num_k = 1 return a
def glyph_classification(self): """ Glyph classification. Returns a list of the classified glyphs with its position and size. """ cknn = knn.kNNInteractive([], [ "area", "aspect_ratio", "black_area", "compactness", "moments", "ncols_feature", "nholes", "nholes_extended", "nrows_feature", "skeleton_features", "top_bottom", "volume", "volume16regions", "volume64regions", "zernike_moments" ], True, 8) cknn.from_xml_filename(self.classifier_glyphs) cknn.load_settings( self.classifier_weights ) # Option for loading the features and weights of the training stage. ccs = self.img_no_st.cc_analysis() grouping_function = classify.ShapedGroupingFunction(16) # variable ? self.classified_image = cknn.group_and_update_list_automatic( ccs, grouping_function, max_parts_per_group=4) # variable ?
from gamera.config import config from gamera import gamera_xml from gamera import knn init_gamera() config.set("progress_bar",True) infile=sys.argv[1] outfile=sys.argv[2] time_factor=sys.argv[3] iknn=knn.kNNInteractive([],['aspect_ratio','moments', 'nrows_feature','ncols_feature', 'volume64regions'],0) iknn.num_k = 3 iknn.from_xml_filename(infile) nknn = iknn.noninteractive_copy() def hello(): global nknn global outfile global infile nknn.stop_optimizing() print "OPT", infile, "GA initial", nknn.ga_initial,"GA best", nknn.ga_best, "GA generation", nknn.ga_generation nknn.save_settings(outfile) nknn.start_optimizing()
from gamera.toolkits.aruspix.ax_file import * axfile = "" group = 0 # start options dialog dialog = Args( [FileOpen("Aruspix file", axfile, "*.axz"), Choice("group", ["Group", "No group"])], name="Select the file" ) params = dialog.show() if params is not None and params[0]: # map parameters i = 0 axfile = params[i] i += 1 group = params[i] i += 1 f = AxFile(axfile, "") gl = [] if group == 1: gl = gamera_xml.glyphs_from_xml(f.tmpdirname + "gamera_page_no_group.xml") else: gl = gamera_xml.glyphs_from_xml(f.tmpdirname + "gamera_page_group.xml") image = load_image(f.tmpdirname + "img2.tif") classifier = knn.kNNInteractive() classifier.display(gl, image)
ccs=image.cc_analysis() pspage=PsaltikiPage(image) if opt.trainfile!="": # # create a classifier and load the database # #print "Classify" if opt.weightfile=="": classifier=knn.kNNInteractive([], [\ 'aspect_ratio',\ 'moments',\ 'nrows_feature',\ 'volume64regions',\ ], 0) classifier.num_k = opt.num_k classifier.from_xml_filename(opt.trainfile) else: classifier=knn.kNNInteractive([],"all") classifier.num_k=opt.num_k classifier.from_xml_filename(opt.trainfile) classifier.load_settings(opt.weightfile) # # classify the connected components of the image, use half of oligon_height # height to decide about grouped glyphs #
# return 'C' elif glyph == 'punctum' and kind == 'dot': return 'U' for n in neume_down: ### Then, all the other neumes if glyph == n: return 'D' for n in neume_up: if glyph == n: return 'U' return print 'CLASSIFYING' cknn = knn.kNNInteractive([], ["area", "aspect_ratio", "black_area", "compactness", "moments", "ncols_feature", "nholes", "nholes_extended", "nrows_feature", "skeleton_features", "top_bottom", "volume", "volume16regions", "volume64regions", "zernike_moments"], 8) #int k 8 cknn.from_xml_filename('/Users/gabriel/Documents/imgs/pdf_to_tiff_conversion/IMG_test/Liber_Usualis/_classifier_training/154_corrected/classifier_glyphs.xml') #154 cknn.load_settings('/Users/gabriel/Documents/imgs/pdf_to_tiff_conversion/IMG_test/Liber_Usualis/_classifier_training/177_corr/GA_opt_1.xml') print 'FEATURES' ccs = image_no_st.cc_analysis() grouping_function = classify.ShapedGroupingFunction(16) # maximum solveable subgraph size 16 class_im = cknn.group_and_update_list_automatic(ccs, grouping_function, max_parts_per_group = 4) # 8 for c in class_im: #print c.get_main_id() staff_number = '' uod = '' staff_number = '' # Comment for have the non-neumes in position line_number = ''
del cc count = count + 1 print "filter done.",len(ccs)-count,"elements left." if(opt.deskew): #from gamera.toolkits.otr.otr_staff import * if opt.verbosity > 0: print "\ntry to skew correct..." rotation = img.rotation_angle_projections(-10,10)[0] img = img.rotate(rotation,0) if opt.verbosity > 0: print "rotated with",rotation,"angle" if(opt.auto_group): cknn = knn.kNNInteractive([], ["aspect_ratio", "volume64regions", "moments", "nholes_extended"], 0) cknn.from_xml_filename(opt.trainfile) if(opt.ccsfilter): the_ccs = ccs else: the_ccs = img.cc_analysis() median_cc = int(median([cc.nrows for cc in the_ccs])) autogroup = ClassifyCCs(cknn) autogroup.parts_to_group = 3 autogroup.grouping_distance = max([2,median_cc / 8]) p = Page(img, classify_ccs=autogroup) if opt.verbosity > 0: print "autogrouping glyphs activated." print "maximal autogroup distance:", autogroup.grouping_distance else: p = Page(img)
del cc count = count + 1 print "filter done.", len(ccs) - count, "elements left." if (opt.deskew): #from gamera.toolkits.otr.otr_staff import * if opt.verbosity > 0: print "\ntry to skew correct..." rotation = img.rotation_angle_projections(-10, 10)[0] img = img.rotate(rotation, 0) if opt.verbosity > 0: print "rotated with", rotation, "angle" if (opt.auto_group): cknn = knn.kNNInteractive( [], ["aspect_ratio", "volume64regions", "moments", "nholes_extended"], 0) cknn.from_xml_filename(opt.trainfile) if (opt.ccsfilter): the_ccs = ccs else: the_ccs = img.cc_analysis() median_cc = int(median([cc.nrows for cc in the_ccs])) autogroup = ClassifyCCs(cknn) autogroup.parts_to_group = 3 autogroup.grouping_distance = max([2, median_cc / 8]) p = Page(img, classify_ccs=autogroup) if opt.verbosity > 0: print "autogrouping glyphs activated." print "maximal autogroup distance:", autogroup.grouping_distance else:
from gamera.knn import kNNInteractive classifier = kNNInteractive() classifier.from_xml_filename("preomr.xml")