def OnOpenXML(self, event): #--------------------------------------------------------------------------- from gamera import gamera_xml from gamera import knn filename = gui_util.open_file_dialog(self, gamera_xml.extensions) if filename is None: gui_util.message("Can't open classifier-xml file") return classifier = knn.kNNNonInteractive(filename) self.classifier = classifier self.SetClassifiers() self.UpdatePanels() self.EnableControls(True)
def OnOpenXML(self, event): # --------------------------------------------------------------------------- from gamera import gamera_xml from gamera import knn filename = gui_util.open_file_dialog(self, gamera_xml.extensions) if filename == None: gui_util.message("Can't open classifier-xml file") return classifier = knn.kNNNonInteractive(filename) self.classifier = classifier self.SetClassifiers() self.UpdatePanels() self.EnableControls(True)
def test_noninteractive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() database = gamera_xml.glyphs_from_xml("data/testline.xml") classifier = knn.kNNNonInteractive(database,features=featureset,normalize=False) assert not classifier.is_interactive() assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) classifier.serialize("tmp/serialized.knn") classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.unserialize("tmp/serialized.knn")
def test_noninteractive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() database = gamera_xml.glyphs_from_xml("data/testline.xml") classifier = knn.kNNNonInteractive(database, features=featureset, normalize=False) assert not classifier.is_interactive() assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) classifier.serialize("tmp/serialized.knn") classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.unserialize("tmp/serialized.knn")
def CopyClassifier(self, classifier): #--------------------------------------------------------------------------- from gamera import knn copiedDatabase = [] for img in self.classifier.get_database(): imgCopy = img.image_copy() imgCopy.id_name = img.id_name imgCopy.classification_state = img.classification_state copiedDatabase.append(imgCopy) copiedClassifier = knn.kNNNonInteractive( copiedDatabase, self.classifier.features, self.classifier._perform_splits, self.classifier.num_k, self.classifier.normalize) copiedClassifier.set_weights(self.classifier.get_weights()) copiedClassifier.set_selections(self.classifier.get_selections()) return copiedClassifier
def CopyClassifier(self, classifier): # --------------------------------------------------------------------------- from gamera import knn copiedDatabase = [] for img in self.classifier.get_database(): imgCopy = img.image_copy() imgCopy.id_name = img.id_name imgCopy.classification_state = img.classification_state copiedDatabase.append(imgCopy) copiedClassifier = knn.kNNNonInteractive( copiedDatabase, self.classifier.features, self.classifier._perform_splits, self.classifier.num_k, self.classifier.normalize, ) copiedClassifier.set_weights(self.classifier.get_weights()) copiedClassifier.set_selections(self.classifier.get_selections()) return copiedClassifier
def aomr_remaining_pages(outdir, classifier): aomr_opts = { 'staff_finder': 0, 'lines_per_staff': 4, 'staff_removal': 0, 'binarization': 0, 'discard_size': 14 } for dirpath, dirnames, filenames in os.walk(outdir): if dirpath == outdir: continue if ".git" in dirpath.split("/"): continue folder_no = os.path.basename(dirpath) pnum = int(folder_no) lg.debug("Processing page {0}".format(pnum)) # these files give us problems. if pnum in [41, 87, 100]: lg.debug("Skipping page {0}".format(pnum)) continue corrpg = "{0}_corr_page_glyphs.xml".format(folder_no.zfill(4)) # badpg = "bad_{0}_corr_page_glyphs.xml".format(folder_no.zfill(4)) if not corrpg in filenames: # we need to perform aomr. original_image = os.path.join(dirpath, "{0}_staves_only.tiff".format(folder_no.zfill(4))) aomr_obj = AomrObject(original_image, **aomr_opts) try: lg.debug("Finding Staves") s = aomr_obj.find_staves() except Exception, e: lg.debug("Cannot find staves: {0} because {1}".format(pnum, e)) continue if not s: lg.debug("No staves were found on page {0}".format(pnum)) continue try: aomr_obj.remove_stafflines() except Exception, e: lg.debug("Cannot remove stafflines: {0} because {1}".format(pnum, e)) continue cknn = knn.kNNNonInteractive(classifier, 'all', True, 1) ccs = aomr_obj.img_no_st.cc_analysis() func = classify.BoundingBoxGroupingFunction(4) classified_image = cknn.group_and_update_list_automatic( ccs, grouping_function=func, max_parts_per_group=4, max_graph_size=16 ) lg.debug("save all the files from page {0}".format(pnum)) cknn.generate_features_on_glyphs(classified_image) s = SymbolTable() for split in plugin.methods_flat_category("Segmentation", ONEBIT): s.add("_split." + split[0]) s.add("_group") s.add("_group._part") gamera_xml.WriteXMLFile(glyphs=classified_image, with_features=True).write_filename(os.path.join(dirpath, "{0}_uncorr_page_glyphs.xml".format(folder_no.zfill(4)))) del aomr_obj del classified_image del cknn
import haar as h import sys import cv2 # Import the Gamera core and initialize it from gamera.core import * # Import the classifier module from gamera import knn from gamera.classify import ShapedGroupingFunction from gamera import gamera_xml init_gamera() # Create a new classifier, figure out feature u like classifier = knn.kNNNonInteractive("./classifiers/knnFullSymbolGlyphs.xml", ["aspect_ratio","moments","compactness", "fourier_broken", "volume16regions","nholes"], 0,normalize=True) # Load training data #classifier.from_xml_filename("knn_glyphs.xml") # Load the image, and convert it to onebit image = load_image(sys.argv[-1]) onebit = image.to_onebit() STAFF_HEIGHT=2 #int(sys.argv[2]) # Get the connected components from the image component_list = onebit.cc_analysis() print len(component_list)
def process_axz_directory(directory, class_glyphs, class_weights, outputdir): print "Processing AXZ Folder" for dirpath, dirnames, filenames in os.walk(directory): # if os.path.abspath(directory) == os.path.abspath(dirpath): # continue for f in filenames: if f == ".DS_Store": continue pagenum = f.split("_")[-1].strip('.axz') print "Loading page ", str(pagenum) # create an output directory outdir = os.path.join(outputdir, pagenum) os.mkdir(outdir) axzfile = os.path.join(dirpath, f) ax = AxFile(axzfile, "") axtmp = ax.tmpdir staves = ax.get_img0().extract(0) # shutil.move(tfile[1], os.path.join(outdir, "original_image.tiff")) sfile = os.path.join(outdir, "original_image.tiff") save_image(staves, sfile) # lg.debug("Tempfile is: {0}".format(tfile[1])) # grab and remove the staves aomr_opts = { 'lines_per_staff': 4, 'staff_finder': 0, 'staff_removal': 0, 'binarization': 0, 'discard_size': 12 # GVM, was 6 } aomr_obj = AomrObject(sfile, **aomr_opts) try: lg.debug("Finding Staves") s = aomr_obj.find_staves() except Exception, e: lg.debug("Cannot find staves: {0} because {1}".format(pagenum, e)) continue lg.debug("S is: {0}".format(s)) if not s: lg.debug("no staves were found") os.remove(sfile) os.rmdir(outdir) try: aomr_obj.remove_stafflines() except Exception, e: lg.debug("Cannot remove stafflines: {0} because {1}".format(pagenum, e)) continue cknn = knn.kNNNonInteractive(class_glyphs, 'all', True, 1) # cknn.load_settings(class_weights) ccs = aomr_obj.img_no_st.cc_analysis() func = classify.BoundingBoxGroupingFunction(4) # classified_image = cknn.group_and_update_list_automatic(ccs, grouping_function, max_parts_per_group=4, max_graph_size=16) classified_image = cknn.group_and_update_list_automatic( ccs, grouping_function=func, max_parts_per_group=4, max_graph_size=16 ) lg.debug("save all the files into this directory") cknn.save_settings(os.path.join(outdir, "classifier_settings.xml")) cknn.generate_features_on_glyphs(classified_image) s = SymbolTable() for split in plugin.methods_flat_category("Segmentation", ONEBIT): s.add("_split." + split[0]) s.add("_group") s.add("_group._part") avg_punctum_col = aomr_obj.average_punctum(cknn.get_glyphs()) print 'average punctum column size = ', avg_punctum_col glyphs_center_of_mass = aomr_obj.x_projection_vector(cknn.get_glyphs(), avg_punctum_col, aomr_opts.get('discard_size')) print 'center of mass for each glyph = ', glyphs_center_of_mass gamera_xml.WriteXMLFile(glyphs=classified_image, with_features=True).write_filename(os.path.join(outdir, "page_glyphs.xml")) # gamera_xml.WriteXMLFile(symbol_table=s).write_filename(os.path.join(outdir, "symbol_table.xml")) # cknn.to_xml_filename(os.path.join(outdir, "classifier_glyphs.xml"), with_features=True) save_image(aomr_obj.img_no_st, os.path.join(outdir, "source_image.tiff")) # clean up del aomr_obj.img_no_st del aomr_obj del classified_image del ax del cknn
def run(self, *args, **kwargs): """ This runs the optimization on all the classifiers in the project. The optimization can be run with many different settings. For convenience, I've included all the settings in this script and commented out the ones that I'm not currently using. Feel free to tweak with them. For most of the arguments, I hope it is implicitly clear which ones are int and which ones are float/double. For example, arg_one=3 means arg_one should be an int, while arg_two=3.0 means arg_two should be a double/float. Later on, we can build a client interface to tweak all these settings. Detailed documentation can be found at http://gamera.sourceforge.net/doc/html/ga_optimization.html Code template taken from http://gamera.sourceforge.net/doc/html/ga_optimization.html#script-usage """ init_gamera() classifiers = Classifier.objects.all() for classifier in classifiers: project = classifier.project optimization_start = timezone.now() print "Optimizing classifier {0}".format(classifier.name) cknn = knn.kNNNonInteractive(classifier.file_path, features = 'all', normalize = False) print "Setting base settings" baseSettings = knnga.GABaseSetting() baseSettings.opMode = knnga.GA_WEIGHTING # Or knnga.GA_SELECTION baseSettings.popSize = 75 baseSettings.crossRate = 0.95 baseSettings.mutRate = 0.05 print "Settings selection options" selection = knnga.GASelection() selection.setRoulettWheelScaled(2.0) #selection.setRoulettWheelScaled(double pressure=2.0) # Pressure \in [1,2] #selection.setRandomSelection() #selection.setRankSelection(pressure=2.0, exponent=1.0) #selection.setStochUniSampling() #selection.setRoulettWheel() #selection.setTournamentSelection(tSize=3) print "Setting crossover settings" crossover = knnga.GACrossover() crossover.setUniformCrossover(0.5) #crossover.setUniformCrossover(double preference = 0.5) #crossover.setNPointCrossover(n=1) #crossover.setHypercubeCrossover(int numFeatures, double min, double max, alpha=0.0) #crossover.setSBXcrossover(int numFeatures, double min, double max, eta=0.0) #crossover.setSegmentCrossover(int numFeatures, double min, double max, alpha=0.0) print "Setting Mutation settings" mutation = knnga.GAMutation() #mutation.setShiftMutation() mutation.setSwapMutation() #mutation.setBinaryMutation(rate=0.05, normalize=False) mutation.setBinaryMutation(0.05, False) #mutation.setGaussMutation(int numFeatures, double min, double max, double sigma, double rate) #mutation.setInversionMutation() print "Setting replacement settings" replacement = knnga.GAReplacement() replacement.setSSGAdetTournament(3) #replacement.setSSGAdetTournament(int tSize=3) #replacement.setGenerationalReplacement() #replacement.setSSGAworse() print "Setting stop criteria" stop = knnga.GAStopCriteria() #stop.setSteadyStateStop(int minGens=100, int noChangeGens=20) stop.setSteadyStateStop(100, 20) #stop.setBestFitnessStop(optimum=1.0) #stop.setMaxFitnessEvals(n=5000) #stop.setMaxGenerations(100) print "Setting parallelization settings" parallel = knnga.GAParallelization() parallel.mode = True parallel.thredNum = 4 # Combine each setting object into one main object ga = knnga.GAOptimization(cknn, baseSettings, selection, crossover, mutation, replacement, stop, parallel) print "Beginning calculation..." ga.startCalculation() print "Done! Saving the produced settings." optimization_end = timezone.now() # Choosing a name for the new classifier_setting date_string = datetime.now().strftime("%Y_%m_%d_%I%M%p") setting_name = "{0}:{1} Periodic Optimization".format(classifier.name[:200], date_string,) tdir = tempfile.mkdtemp() temp_xml_filepath = os.path.join(tdir, str(classifier.uuid) + '.xml') cknn.save_settings(temp_xml_filepath) try: classifier = refetch_from_db(classifier) except ObjectDeletedError: print "Sadly classifier {0} was deleted.".format(classifier.name) # Now this is a lonely setting file with no classifier to hang out with. classifier = None classifier_setting_instance = ClassifierSetting.objects.create(name=setting_name, project=project, fitness=ga.bestFitness, producer=classifier, optimization_started_at = optimization_start, optimization_finished_at = optimization_end) with open(temp_xml_filepath, 'rb') as f: taskutil.save_file_field(classifier_setting_instance.settings_file, 'settings_xml', File(f)) shutil.rmtree(tdir)
def run(self, *args, **kwargs): """ This runs the optimization on all the classifiers in the project. The optimization can be run with many different settings. For convenience, I've included all the settings in this script and commented out the ones that I'm not currently using. Feel free to tweak with them. For most of the arguments, I hope it is implicitly clear which ones are int and which ones are float/double. For example, arg_one=3 means arg_one should be an int, while arg_two=3.0 means arg_two should be a double/float. Later on, we can build a client interface to tweak all these settings. Detailed documentation can be found at http://gamera.sourceforge.net/doc/html/ga_optimization.html Code template taken from http://gamera.sourceforge.net/doc/html/ga_optimization.html#script-usage """ init_gamera() classifiers = Classifier.objects.all() for classifier in classifiers: project = classifier.project optimization_start = timezone.now() print "Optimizing classifier {0}".format(classifier.name) cknn = knn.kNNNonInteractive(classifier.file_path, features='all', normalize=False) print "Setting base settings" baseSettings = knnga.GABaseSetting() baseSettings.opMode = knnga.GA_WEIGHTING # Or knnga.GA_SELECTION baseSettings.popSize = 75 baseSettings.crossRate = 0.95 baseSettings.mutRate = 0.05 print "Settings selection options" selection = knnga.GASelection() selection.setRoulettWheelScaled(2.0) #selection.setRoulettWheelScaled(double pressure=2.0) # Pressure \in [1,2] #selection.setRandomSelection() #selection.setRankSelection(pressure=2.0, exponent=1.0) #selection.setStochUniSampling() #selection.setRoulettWheel() #selection.setTournamentSelection(tSize=3) print "Setting crossover settings" crossover = knnga.GACrossover() crossover.setUniformCrossover(0.5) #crossover.setUniformCrossover(double preference = 0.5) #crossover.setNPointCrossover(n=1) #crossover.setHypercubeCrossover(int numFeatures, double min, double max, alpha=0.0) #crossover.setSBXcrossover(int numFeatures, double min, double max, eta=0.0) #crossover.setSegmentCrossover(int numFeatures, double min, double max, alpha=0.0) print "Setting Mutation settings" mutation = knnga.GAMutation() #mutation.setShiftMutation() mutation.setSwapMutation() #mutation.setBinaryMutation(rate=0.05, normalize=False) mutation.setBinaryMutation(0.05, False) #mutation.setGaussMutation(int numFeatures, double min, double max, double sigma, double rate) #mutation.setInversionMutation() print "Setting replacement settings" replacement = knnga.GAReplacement() replacement.setSSGAdetTournament(3) #replacement.setSSGAdetTournament(int tSize=3) #replacement.setGenerationalReplacement() #replacement.setSSGAworse() print "Setting stop criteria" stop = knnga.GAStopCriteria() #stop.setSteadyStateStop(int minGens=100, int noChangeGens=20) stop.setSteadyStateStop(100, 20) #stop.setBestFitnessStop(optimum=1.0) #stop.setMaxFitnessEvals(n=5000) #stop.setMaxGenerations(100) print "Setting parallelization settings" parallel = knnga.GAParallelization() parallel.mode = True parallel.thredNum = 4 # Combine each setting object into one main object ga = knnga.GAOptimization(cknn, baseSettings, selection, crossover, mutation, replacement, stop, parallel) print "Beginning calculation..." ga.startCalculation() print "Done! Saving the produced settings." optimization_end = timezone.now() # Choosing a name for the new classifier_setting date_string = datetime.now().strftime("%Y_%m_%d_%I%M%p") setting_name = "{0}:{1} Periodic Optimization".format( classifier.name[:200], date_string, ) tdir = tempfile.mkdtemp() temp_xml_filepath = os.path.join(tdir, str(classifier.uuid) + '.xml') cknn.save_settings(temp_xml_filepath) try: classifier = refetch_from_db(classifier) except ObjectDeletedError: print "Sadly classifier {0} was deleted.".format( classifier.name) # Now this is a lonely setting file with no classifier to hang out with. classifier = None classifier_setting_instance = ClassifierSetting.objects.create( name=setting_name, project=project, fitness=ga.bestFitness, producer=classifier, optimization_started_at=optimization_start, optimization_finished_at=optimization_end) with open(temp_xml_filepath, 'rb') as f: taskutil.save_file_field( classifier_setting_instance.settings_file, 'settings_xml', File(f)) shutil.rmtree(tdir)