Example #1
0
    def OnOpenXML(self, event):
    #---------------------------------------------------------------------------
        from gamera import gamera_xml
        from gamera import knn

        filename = gui_util.open_file_dialog(self, gamera_xml.extensions)
        if filename is None:
            gui_util.message("Can't open classifier-xml file")
            return

        classifier = knn.kNNNonInteractive(filename)
        self.classifier = classifier
        self.SetClassifiers()

        self.UpdatePanels()
        self.EnableControls(True)
Example #2
0
    def OnOpenXML(self, event):
        # ---------------------------------------------------------------------------
        from gamera import gamera_xml
        from gamera import knn

        filename = gui_util.open_file_dialog(self, gamera_xml.extensions)
        if filename == None:
            gui_util.message("Can't open classifier-xml file")
            return

        classifier = knn.kNNNonInteractive(filename)
        self.classifier = classifier
        self.SetClassifiers()

        self.UpdatePanels()
        self.EnableControls(True)
Example #3
0
def test_noninteractive_classifier():
   # We assume the XML reading/writing itself is fine (given
   # test_xml), but we should test the wrappers in classify anyway
   image = load_image("data/testline.png")
   ccs = image.cc_analysis()

   database = gamera_xml.glyphs_from_xml("data/testline.xml")
   classifier = knn.kNNNonInteractive(database,features=featureset,normalize=False)
   assert not classifier.is_interactive()
   assert len(classifier.get_glyphs()) == 66
   
   _test_classification(classifier, ccs)

   classifier.serialize("tmp/serialized.knn")
   classifier.clear_glyphs()
   assert len(classifier.get_glyphs()) == 0
   classifier.unserialize("tmp/serialized.knn")
Example #4
0
def test_noninteractive_classifier():
    # We assume the XML reading/writing itself is fine (given
    # test_xml), but we should test the wrappers in classify anyway
    image = load_image("data/testline.png")
    ccs = image.cc_analysis()

    database = gamera_xml.glyphs_from_xml("data/testline.xml")
    classifier = knn.kNNNonInteractive(database,
                                       features=featureset,
                                       normalize=False)
    assert not classifier.is_interactive()
    assert len(classifier.get_glyphs()) == 66

    _test_classification(classifier, ccs)

    classifier.serialize("tmp/serialized.knn")
    classifier.clear_glyphs()
    assert len(classifier.get_glyphs()) == 0
    classifier.unserialize("tmp/serialized.knn")
Example #5
0
    def CopyClassifier(self, classifier):
        #---------------------------------------------------------------------------
        from gamera import knn

        copiedDatabase = []
        for img in self.classifier.get_database():
            imgCopy = img.image_copy()
            imgCopy.id_name = img.id_name
            imgCopy.classification_state = img.classification_state
            copiedDatabase.append(imgCopy)

        copiedClassifier = knn.kNNNonInteractive(
            copiedDatabase, self.classifier.features,
            self.classifier._perform_splits, self.classifier.num_k,
            self.classifier.normalize)
        copiedClassifier.set_weights(self.classifier.get_weights())
        copiedClassifier.set_selections(self.classifier.get_selections())

        return copiedClassifier
Example #6
0
    def CopyClassifier(self, classifier):
        # ---------------------------------------------------------------------------
        from gamera import knn

        copiedDatabase = []
        for img in self.classifier.get_database():
            imgCopy = img.image_copy()
            imgCopy.id_name = img.id_name
            imgCopy.classification_state = img.classification_state
            copiedDatabase.append(imgCopy)

        copiedClassifier = knn.kNNNonInteractive(
            copiedDatabase,
            self.classifier.features,
            self.classifier._perform_splits,
            self.classifier.num_k,
            self.classifier.normalize,
        )
        copiedClassifier.set_weights(self.classifier.get_weights())
        copiedClassifier.set_selections(self.classifier.get_selections())

        return copiedClassifier
Example #7
0
def aomr_remaining_pages(outdir, classifier):
    aomr_opts = {
        'staff_finder': 0,
        'lines_per_staff': 4,
        'staff_removal': 0,
        'binarization': 0,
        'discard_size': 14
    }
    for dirpath, dirnames, filenames in os.walk(outdir):
        if dirpath == outdir:
            continue
            
        if ".git" in dirpath.split("/"):
            continue
            
        folder_no = os.path.basename(dirpath)
        pnum = int(folder_no)
        
        lg.debug("Processing page {0}".format(pnum))
        
        # these files give us problems.
        if pnum in [41, 87, 100]:
            lg.debug("Skipping page {0}".format(pnum))
            continue
        
        corrpg = "{0}_corr_page_glyphs.xml".format(folder_no.zfill(4))
        # badpg = "bad_{0}_corr_page_glyphs.xml".format(folder_no.zfill(4))
        if not corrpg in filenames:
            # we need to perform aomr.
            original_image = os.path.join(dirpath, "{0}_staves_only.tiff".format(folder_no.zfill(4)))
            aomr_obj = AomrObject(original_image, **aomr_opts)
            
            try:
                lg.debug("Finding Staves")
                s = aomr_obj.find_staves()
            except Exception, e:
                lg.debug("Cannot find staves: {0} because {1}".format(pnum, e))
                continue
                
            if not s:
                lg.debug("No staves were found on page {0}".format(pnum))
                continue
                
            try:
                aomr_obj.remove_stafflines()
            except Exception, e:
                lg.debug("Cannot remove stafflines: {0} because {1}".format(pnum, e))
                continue
            
            cknn = knn.kNNNonInteractive(classifier, 'all', True, 1)
            ccs = aomr_obj.img_no_st.cc_analysis()
            func = classify.BoundingBoxGroupingFunction(4)
            classified_image = cknn.group_and_update_list_automatic(
                ccs,
                grouping_function=func,
                max_parts_per_group=4,
                max_graph_size=16
            )
            
            lg.debug("save all the files from page {0}".format(pnum))
            cknn.generate_features_on_glyphs(classified_image)
            
            s = SymbolTable()
            for split in plugin.methods_flat_category("Segmentation", ONEBIT):
               s.add("_split." + split[0])
            s.add("_group")
            s.add("_group._part")
            
            gamera_xml.WriteXMLFile(glyphs=classified_image, with_features=True).write_filename(os.path.join(dirpath, "{0}_uncorr_page_glyphs.xml".format(folder_no.zfill(4))))
            
            del aomr_obj
            del classified_image
            del cknn
import haar as h
import sys
import cv2
# Import the Gamera core and initialize it
from gamera.core import *
# Import the classifier module
from gamera import knn
from gamera.classify import ShapedGroupingFunction
from gamera import gamera_xml
init_gamera()




# Create a new classifier, figure out feature u like 
classifier = knn.kNNNonInteractive("./classifiers/knnFullSymbolGlyphs.xml",
        ["aspect_ratio","moments","compactness", "fourier_broken", "volume16regions","nholes"], 0,normalize=True)
# Load training data
#classifier.from_xml_filename("knn_glyphs.xml")


# Load the image, and convert it to onebit

image = load_image(sys.argv[-1])
onebit = image.to_onebit()
STAFF_HEIGHT=2 #int(sys.argv[2])

# Get the connected components from the image
component_list = onebit.cc_analysis()

print len(component_list)
Example #9
0
def process_axz_directory(directory, class_glyphs, class_weights, outputdir):
    print "Processing AXZ Folder"
    for dirpath, dirnames, filenames in os.walk(directory):
        
        # if os.path.abspath(directory) == os.path.abspath(dirpath):
        #     continue
            
        for f in filenames:
            if f == ".DS_Store":
                continue
                
            pagenum = f.split("_")[-1].strip('.axz')
            print "Loading page ", str(pagenum)
            
            # create an output directory
            outdir = os.path.join(outputdir, pagenum)
            os.mkdir(outdir)
            
            axzfile = os.path.join(dirpath, f)
            
            ax = AxFile(axzfile, "")
            axtmp = ax.tmpdir
            staves = ax.get_img0().extract(0)
            
            # shutil.move(tfile[1], os.path.join(outdir, "original_image.tiff"))
            
            sfile = os.path.join(outdir, "original_image.tiff")

            save_image(staves, sfile)
            
            # lg.debug("Tempfile is: {0}".format(tfile[1]))
            
            # grab and remove the staves
            aomr_opts = {
                'lines_per_staff': 4,
                'staff_finder': 0,
                'staff_removal': 0,
                'binarization': 0,
                'discard_size': 12 # GVM, was 6 
            }
            
            aomr_obj = AomrObject(sfile, **aomr_opts)

            
            try:
                lg.debug("Finding Staves")
                s = aomr_obj.find_staves()
            except Exception, e:
                lg.debug("Cannot find staves: {0} because {1}".format(pagenum, e))
                continue
            
            lg.debug("S is: {0}".format(s))
            if not s:
                lg.debug("no staves were found")
                os.remove(sfile)
                os.rmdir(outdir)
            
            try:
                aomr_obj.remove_stafflines()
            except Exception, e:
                lg.debug("Cannot remove stafflines: {0} because {1}".format(pagenum, e))
                continue
            
            cknn = knn.kNNNonInteractive(class_glyphs, 'all', True, 1)
            # cknn.load_settings(class_weights)
            ccs = aomr_obj.img_no_st.cc_analysis()
            func = classify.BoundingBoxGroupingFunction(4)
            # classified_image = cknn.group_and_update_list_automatic(ccs, grouping_function, max_parts_per_group=4, max_graph_size=16)
            classified_image = cknn.group_and_update_list_automatic(
                ccs,
                grouping_function=func,
                max_parts_per_group=4,
                max_graph_size=16
            )
            
            lg.debug("save all the files into this directory")
            cknn.save_settings(os.path.join(outdir, "classifier_settings.xml"))
            
            cknn.generate_features_on_glyphs(classified_image)
            s = SymbolTable()
            for split in plugin.methods_flat_category("Segmentation", ONEBIT):
               s.add("_split." + split[0])
            s.add("_group")
            s.add("_group._part")
            
            
            
            
            avg_punctum_col = aomr_obj.average_punctum(cknn.get_glyphs())
            print 'average punctum column size = ', avg_punctum_col

            glyphs_center_of_mass = aomr_obj.x_projection_vector(cknn.get_glyphs(), avg_punctum_col, aomr_opts.get('discard_size'))
            print 'center of mass for each glyph  = ', glyphs_center_of_mass
            gamera_xml.WriteXMLFile(glyphs=classified_image, with_features=True).write_filename(os.path.join(outdir, "page_glyphs.xml"))
            # gamera_xml.WriteXMLFile(symbol_table=s).write_filename(os.path.join(outdir, "symbol_table.xml"))
            # cknn.to_xml_filename(os.path.join(outdir, "classifier_glyphs.xml"), with_features=True)
            save_image(aomr_obj.img_no_st, os.path.join(outdir, "source_image.tiff"))
            
            # clean up
            del aomr_obj.img_no_st
            del aomr_obj
            del classified_image
            del ax
            del cknn
    def run(self, *args, **kwargs):
        """
        This runs the optimization on all the classifiers in the project.

        The optimization can be run with many different settings. For
        convenience, I've included all the settings in this script and
        commented out the ones that I'm not currently using. Feel free
        to tweak with them.

        For most of the arguments, I hope it is implicitly clear which
        ones are int and which ones are float/double. For example,
        arg_one=3 means arg_one should be an int, while arg_two=3.0 means
        arg_two should be a double/float.

        Later on, we can build a client interface to tweak all these settings.

        Detailed documentation can be found at http://gamera.sourceforge.net/doc/html/ga_optimization.html

        Code template taken from http://gamera.sourceforge.net/doc/html/ga_optimization.html#script-usage
        """
        init_gamera()

        classifiers = Classifier.objects.all()

        for classifier in classifiers:
            project = classifier.project

            optimization_start = timezone.now()
            print "Optimizing classifier {0}".format(classifier.name)

            cknn = knn.kNNNonInteractive(classifier.file_path,
                                               features = 'all',
                                               normalize = False)

            print "Setting base settings"
            baseSettings = knnga.GABaseSetting()
            baseSettings.opMode = knnga.GA_WEIGHTING  # Or knnga.GA_SELECTION
            baseSettings.popSize = 75
            baseSettings.crossRate = 0.95
            baseSettings.mutRate = 0.05

            print "Settings selection options"
            selection = knnga.GASelection()
            selection.setRoulettWheelScaled(2.0)
            #selection.setRoulettWheelScaled(double pressure=2.0)  # Pressure \in [1,2]
            #selection.setRandomSelection()
            #selection.setRankSelection(pressure=2.0, exponent=1.0)
            #selection.setStochUniSampling()
            #selection.setRoulettWheel()
            #selection.setTournamentSelection(tSize=3)

            print "Setting crossover settings"
            crossover = knnga.GACrossover()
            crossover.setUniformCrossover(0.5)
            #crossover.setUniformCrossover(double preference = 0.5)
            #crossover.setNPointCrossover(n=1)
            #crossover.setHypercubeCrossover(int numFeatures, double min, double max, alpha=0.0)
            #crossover.setSBXcrossover(int numFeatures, double min, double max, eta=0.0)
            #crossover.setSegmentCrossover(int numFeatures, double min, double max, alpha=0.0)

            print "Setting Mutation settings"
            mutation = knnga.GAMutation()
            #mutation.setShiftMutation()
            mutation.setSwapMutation()
            #mutation.setBinaryMutation(rate=0.05, normalize=False)
            mutation.setBinaryMutation(0.05, False)
            #mutation.setGaussMutation(int numFeatures, double min, double max, double sigma, double rate)
            #mutation.setInversionMutation()

            print "Setting replacement settings"
            replacement = knnga.GAReplacement()
            replacement.setSSGAdetTournament(3)
            #replacement.setSSGAdetTournament(int tSize=3)
            #replacement.setGenerationalReplacement()
            #replacement.setSSGAworse()

            print "Setting stop criteria"
            stop = knnga.GAStopCriteria()
            #stop.setSteadyStateStop(int minGens=100, int noChangeGens=20)
            stop.setSteadyStateStop(100, 20)
            #stop.setBestFitnessStop(optimum=1.0)
            #stop.setMaxFitnessEvals(n=5000)
            #stop.setMaxGenerations(100)

            print "Setting parallelization settings"
            parallel = knnga.GAParallelization()
            parallel.mode = True
            parallel.thredNum = 4

            # Combine each setting object into one main object
            ga = knnga.GAOptimization(cknn, baseSettings, selection,
                                      crossover, mutation, replacement,
                                      stop, parallel)

            print "Beginning calculation..."
            ga.startCalculation()

            print "Done! Saving the produced settings."

            optimization_end = timezone.now()

            # Choosing a name for the new classifier_setting
            date_string = datetime.now().strftime("%Y_%m_%d_%I%M%p")
            setting_name = "{0}:{1} Periodic Optimization".format(classifier.name[:200], date_string,)

            tdir = tempfile.mkdtemp()
            temp_xml_filepath = os.path.join(tdir, str(classifier.uuid) + '.xml')
            cknn.save_settings(temp_xml_filepath)

            try:
                classifier = refetch_from_db(classifier)
            except ObjectDeletedError:
                print "Sadly classifier {0} was deleted.".format(classifier.name)
                # Now this is a lonely setting file with no classifier to hang out with.
                classifier = None


            classifier_setting_instance = ClassifierSetting.objects.create(name=setting_name,
                                                                           project=project,
                                                                           fitness=ga.bestFitness,
                                                                           producer=classifier,
                                                                           optimization_started_at = optimization_start,
                                                                           optimization_finished_at = optimization_end)

            with open(temp_xml_filepath, 'rb') as f:
                taskutil.save_file_field(classifier_setting_instance.settings_file, 'settings_xml', File(f))

            shutil.rmtree(tdir)
Example #11
0
    def run(self, *args, **kwargs):
        """
        This runs the optimization on all the classifiers in the project.

        The optimization can be run with many different settings. For
        convenience, I've included all the settings in this script and
        commented out the ones that I'm not currently using. Feel free
        to tweak with them.

        For most of the arguments, I hope it is implicitly clear which
        ones are int and which ones are float/double. For example,
        arg_one=3 means arg_one should be an int, while arg_two=3.0 means
        arg_two should be a double/float.

        Later on, we can build a client interface to tweak all these settings.

        Detailed documentation can be found at http://gamera.sourceforge.net/doc/html/ga_optimization.html

        Code template taken from http://gamera.sourceforge.net/doc/html/ga_optimization.html#script-usage
        """
        init_gamera()

        classifiers = Classifier.objects.all()

        for classifier in classifiers:
            project = classifier.project

            optimization_start = timezone.now()
            print "Optimizing classifier {0}".format(classifier.name)

            cknn = knn.kNNNonInteractive(classifier.file_path,
                                         features='all',
                                         normalize=False)

            print "Setting base settings"
            baseSettings = knnga.GABaseSetting()
            baseSettings.opMode = knnga.GA_WEIGHTING  # Or knnga.GA_SELECTION
            baseSettings.popSize = 75
            baseSettings.crossRate = 0.95
            baseSettings.mutRate = 0.05

            print "Settings selection options"
            selection = knnga.GASelection()
            selection.setRoulettWheelScaled(2.0)
            #selection.setRoulettWheelScaled(double pressure=2.0)  # Pressure \in [1,2]
            #selection.setRandomSelection()
            #selection.setRankSelection(pressure=2.0, exponent=1.0)
            #selection.setStochUniSampling()
            #selection.setRoulettWheel()
            #selection.setTournamentSelection(tSize=3)

            print "Setting crossover settings"
            crossover = knnga.GACrossover()
            crossover.setUniformCrossover(0.5)
            #crossover.setUniformCrossover(double preference = 0.5)
            #crossover.setNPointCrossover(n=1)
            #crossover.setHypercubeCrossover(int numFeatures, double min, double max, alpha=0.0)
            #crossover.setSBXcrossover(int numFeatures, double min, double max, eta=0.0)
            #crossover.setSegmentCrossover(int numFeatures, double min, double max, alpha=0.0)

            print "Setting Mutation settings"
            mutation = knnga.GAMutation()
            #mutation.setShiftMutation()
            mutation.setSwapMutation()
            #mutation.setBinaryMutation(rate=0.05, normalize=False)
            mutation.setBinaryMutation(0.05, False)
            #mutation.setGaussMutation(int numFeatures, double min, double max, double sigma, double rate)
            #mutation.setInversionMutation()

            print "Setting replacement settings"
            replacement = knnga.GAReplacement()
            replacement.setSSGAdetTournament(3)
            #replacement.setSSGAdetTournament(int tSize=3)
            #replacement.setGenerationalReplacement()
            #replacement.setSSGAworse()

            print "Setting stop criteria"
            stop = knnga.GAStopCriteria()
            #stop.setSteadyStateStop(int minGens=100, int noChangeGens=20)
            stop.setSteadyStateStop(100, 20)
            #stop.setBestFitnessStop(optimum=1.0)
            #stop.setMaxFitnessEvals(n=5000)
            #stop.setMaxGenerations(100)

            print "Setting parallelization settings"
            parallel = knnga.GAParallelization()
            parallel.mode = True
            parallel.thredNum = 4

            # Combine each setting object into one main object
            ga = knnga.GAOptimization(cknn, baseSettings, selection, crossover,
                                      mutation, replacement, stop, parallel)

            print "Beginning calculation..."
            ga.startCalculation()

            print "Done! Saving the produced settings."

            optimization_end = timezone.now()

            # Choosing a name for the new classifier_setting
            date_string = datetime.now().strftime("%Y_%m_%d_%I%M%p")
            setting_name = "{0}:{1} Periodic Optimization".format(
                classifier.name[:200],
                date_string,
            )

            tdir = tempfile.mkdtemp()
            temp_xml_filepath = os.path.join(tdir,
                                             str(classifier.uuid) + '.xml')
            cknn.save_settings(temp_xml_filepath)

            try:
                classifier = refetch_from_db(classifier)
            except ObjectDeletedError:
                print "Sadly classifier {0} was deleted.".format(
                    classifier.name)
                # Now this is a lonely setting file with no classifier to hang out with.
                classifier = None

            classifier_setting_instance = ClassifierSetting.objects.create(
                name=setting_name,
                project=project,
                fitness=ga.bestFitness,
                producer=classifier,
                optimization_started_at=optimization_start,
                optimization_finished_at=optimization_end)

            with open(temp_xml_filepath, 'rb') as f:
                taskutil.save_file_field(
                    classifier_setting_instance.settings_file, 'settings_xml',
                    File(f))

            shutil.rmtree(tdir)