def create_mei_files(outdir): aomr_opts = { 'staff_finder': 0, 'lines_per_staff': 4, 'staff_removal': 0, 'binarization': 0, 'discard_size': 12 } for dirpath, dirnames, filenames in os.walk(outdir): if dirpath == outdir: continue if ".git" in dirpath.split("/"): continue folder_no = os.path.basename(dirpath) pnum = int(folder_no) # if not "bad_{0}_corr_page_glyphs.xml".format(folder_no.zfill(4)) in filenames: # continue lg.debug("Generating MEI file for {0}".format(pnum)) if "{0}_corr_page_glyphs.xml".format(folder_no.zfill(4)) in filenames: glyphs = gamera_xml.glyphs_from_xml(os.path.join(dirpath, "{0}_corr_page_glyphs.xml".format(folder_no.zfill(4)))) elif "{0}_uncorr_page_glyphs.xml".format(folder_no.zfill(4)) in filenames: glyphs = gamera_xml.glyphs_from_xml(os.path.join(dirpath, "{0}_uncorr_page_glyphs.xml".format(folder_no.zfill(4)))) else: lg.debug("There was no page glyphs file for page {0}".format(pnum)) continue original_image = os.path.join(dirpath, "{0}_staves_only.tiff".format(folder_no.zfill(4))) mei_file_write = os.path.join(dirpath, '{0}_uncorr.mei'.format(folder_no.zfill(4))) aomr_obj = AomrObject(original_image, **aomr_opts) try: data = aomr_obj.run(glyphs) except OverflowError, e: lg.debug("Could not do detection on {0} because {1}".format(pnum, e)) continue if not data: # no data was returned. lg.debug("No data was found for {0}".format(pnum)) continue mei_file = AomrMeiOutput.AomrMeiOutput(data, "{0}_original_image.tiff".format(pnum), page_number = pnum) meitoxml.meitoxml(mei_file.md, mei_file_write)
def main(options): init_gamera() all_glyphs = [] for dirpath, dirnames, filenames in os.walk(options['gam']): for f in filenames: if f != "page_glyphs.xml": continue # lg.debug("Processing: {0}".format(f)) glyphs = gamera_xml.glyphs_from_xml(os.path.join(dirpath, f)) def __glyphchecker(g): if "_group" in g.get_main_id().split("."): return False elif "_split" in g.get_main_id().split("."): return False else: return True all_glyphs.extend([g.get_main_id() for g in glyphs if __glyphchecker(g)]) all_glyphs.sort() all_glyph_set = set(all_glyphs) all_glyph_list = list(all_glyph_set) all_glyph_list.sort() for n in all_glyph_list: print n
def pitch_find(original_file, page_file, outdir, pitch_find_algorithm, exceptions): aomr_opts = { 'lines_per_staff': 4, 'staff_finder': 0, # 0: Miyao 'staff_removal': 0, 'binarization': 0, 'discard_size': 12, 'exceptions': exceptions } #FILES TO PROCESS glyphs = gamera_xml.glyphs_from_xml(page_file) file_name = (original_file.split('/')[-2] + '_' + original_file.split('/')[-1]) # CREATING AOMR OBJECT, FINDING STAVES, AND RETRIEVING STAFF COORDINATES aomr_obj = AomrObject(original_file, **aomr_opts) st_position = aomr_obj.find_staves() # staves position staff_coords = aomr_obj.staff_coords() las = proc_st_pos(st_position) # staff_non_parallel = aomr_obj.staff_no_non_parallel(glyphs) # print staff_non_parallel if pitch_find_algorithm == 'Miyao': sorted_glyphs = aomr_obj.miyao_pitch_find(glyphs, aomr_opts['discard_size']) elif pitch_find_algorithm == 'AvLines': sorted_glyphs = aomr_obj.pitch_find(glyphs, st_position, aomr_opts['discard_size']) return sorted_glyphs, st_position, aomr_obj, las
def run_my_task(self, inputs, settings, outputs): classifier_path = inputs['GameraXML - Training Data'][0][ 'resource_path'] with self.tempdir() as tdir: tempPath = os.path.join(tdir, classifier_path + '.xml') copyfile(classifier_path, tempPath) cknn = gamera.knn.kNNNonInteractive(tempPath) if 'GameraXML - Feature Selection' in inputs: cknn.load_settings( inputs['GameraXML - Feature Selection'][0]['resource_path']) func = gamera.classify.BoundingBoxGroupingFunction( settings['Bounding box size']) # Load the connected components ccs = glyphs_from_xml( inputs['GameraXML - Connected Components'][0]['resource_path']) # Do grouping cs_image = cknn.group_and_update_list_automatic(ccs, grouping_function=func, max_parts_per_group=4, max_graph_size=16) # Generate the Gamera features cknn.generate_features_on_glyphs(cs_image) # Write the glyphs to GameraXML output_xml = gamera.gamera_xml.WriteXMLFile(glyphs=cs_image, with_features=True) for i in range(len(outputs['GameraXML - Classified Glyphs'])): output_xml.write_filename( outputs['GameraXML - Classified Glyphs'][i]['resource_path'])
def process_glyphs_directory(glyphs_directory, output_dir): aomr_opts = { 'staff_finder': 0, 'lines_per_staff': 4, 'staff_removal': 0, 'binarization': 0, 'discard_size': 12 } print "Processing glyphs directory" for dirpath, dirnames, filenames in os.walk(glyphs_directory): for f in filenames: if f == 'page_glyphs.xml': folder_no = os.path.basename(dirpath) pnum = int(folder_no) input_filename = os.path.join(dirpath, f) lg.debug("Input filename is {0}".format(input_filename)) output_filename = os.path.join(output_dir, folder_no.zfill(4), f) lg.debug("Output filename is {0}".format(output_filename)) shutil.copy(input_filename, output_filename) original_image = os.path.join(output_dir, folder_no.zfill(4), 'original_image.tiff') mei_file_write = os.path.join(output_dir, folder_no.zfill(4), 'liber-usualis-{0}.mei'.format(folder_no.zfill(4))) glyphs = gamera_xml.glyphs_from_xml(output_filename) aomr_obj = AomrObject(original_image, **aomr_opts) data = aomr_obj.run(glyphs) mei_file = AomrMeiOutput.AomrMeiOutput(data, original_image.split('/')[-2], page_number = pnum) meitoxml.meitoxml(mei_file.md, mei_file_write)
def run_my_task(self, inputs, settings, outputs): classifier_path = inputs['GameraXML - Training Data'][0]['resource_path'] with self.tempdir() as tdir: tempPath = os.path.join(tdir, classifier_path + '.xml') copyfile(classifier_path, tempPath) cknn = gamera.knn.kNNNonInteractive(tempPath) if 'GameraXML - Feature Selection' in inputs: cknn.load_settings(inputs['GameraXML - Feature Selection'][0]['resource_path']) func = gamera.classify.BoundingBoxGroupingFunction( settings['Bounding box size']) # Load the connected components ccs = glyphs_from_xml( inputs['GameraXML - Connected Components'][0]['resource_path']) # Do grouping cs_image = cknn.group_and_update_list_automatic(ccs, grouping_function=func, max_parts_per_group=4, max_graph_size=16) # Generate the Gamera features cknn.generate_features_on_glyphs(cs_image) # Write the glyphs to GameraXML output_xml = gamera.gamera_xml.WriteXMLFile(glyphs=cs_image, with_features=True) for i in range(len(outputs['GameraXML - Classified Glyphs'])): output_xml.write_filename( outputs['GameraXML - Classified Glyphs'][i]['resource_path'])
def main(original_file, page_file, outdir): aomr_opts = { 'lines_per_staff': 4, 'staff_finder': 0, # 0: Miyao 'staff_removal': 0, 'binarization': 0, 'discard_size': 12 } #FILES TO PROCESS glyphs = gamera_xml.glyphs_from_xml(page_file) lg.debug("Original file: {0}, page file: {1}".format(original_file, page_file)) page_number = int(original_file.split('/')[-2]) fname = os.path.splitext(original_file.split('/')[-1])[0] + ".mei" file_name = "{0}_{1}".format(page_number, fname) # CREATING AOMR OBJECT, FINDING STAVES, AND RETRIEVING STAFF COORDINATES aomr_obj = AomrObject(original_file, **aomr_opts) st_position = aomr_obj.find_staves() # staves position staff_coords = aomr_obj.staff_coords() sorted_glyphs = aomr_obj.miyao_pitch_find(glyphs) # PITCH FINDING # pitch_find = aomr_obj.pitch_find(glyphs, st_position, aomr_opts.get('discard_size')) # print len(pitch_find) # sorted_glyphs = sorted(proc_glyphs, key=itemgetter(1, 2)) # STRUCTURING THE DATA IN JSON data = {} for s, stave in enumerate(staff_coords): contents = [] for glyph, staff, offset, strt_pos, note, octave, clef_pos, clef in sorted_glyphs: glyph_id = glyph.get_main_id() # lg.debug("Glyph ID: {0}".format(glyph_id)) glyph_type = glyph_id.split(".")[0] glyph_form = glyph_id.split(".")[1:] # lg.debug("sg[1]:{0} s:{1} sg{2}".format(sg[1], s+1, sg)) # structure: g, stave, g.offset_x, note, strt_pos if staff == s+1: j_glyph = { 'type': glyph_type, 'form': glyph_form, 'coord': [glyph.offset_x, glyph.offset_y, glyph.offset_x + glyph.ncols, glyph.offset_y + glyph.nrows], 'strt_pitch': note, 'octv': octave, 'strt_pos': strt_pos, 'clef_pos': clef_pos, 'clef': clef} contents.append(j_glyph) data[s] = {'coord':stave, 'content':contents} #print data # CREATING THE MEI FILE mei_file = AomrMeiOutput.AomrMeiOutput(data, file_name, page_number) meitoxml.meitoxml(mei_file.md, os.path.join(outdir, file_name))
def test_noninteractive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() database = gamera_xml.glyphs_from_xml("data/testline.xml") classifier = knn.kNNNonInteractive(database,features=featureset,normalize=False) assert not classifier.is_interactive() assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) classifier.serialize("tmp/serialized.knn") classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.unserialize("tmp/serialized.knn")
def home(request): init_gamera() encoded_glyphs = [] gamera_glyphs = gamera_xml.glyphs_from_xml( '/Volumes/Shared/LU-OMR/Liber_Usualis_NO_ST/Processed_Pages/1234/classifier_glyphs_1234_654.xml' ) for gamera_glyph in gamera_glyphs: glyph = gamera_glyph.to_rgb().to_pil() buf = StringIO.StringIO() glyph.save(buf, format='PNG') png = buf.getvalue() encoded_png = base64.b64encode(png) encoded_glyphs.append(encoded_png) for glyph in encoded_glyphs: print glyph print "" return render(request, 'index.html', {'encoded_glyphs': encoded_glyphs})
def home(request): init_gamera() encoded_glyphs = [] gamera_glyphs = gamera_xml.glyphs_from_xml('/Volumes/Shared/LU-OMR/Liber_Usualis_NO_ST/Processed_Pages/1234/classifier_glyphs_1234_654.xml') for gamera_glyph in gamera_glyphs: glyph = gamera_glyph.to_rgb().to_pil() buf = StringIO.StringIO() glyph.save(buf, format='PNG') png = buf.getvalue() encoded_png = base64.b64encode(png) encoded_glyphs.append(encoded_png) for glyph in encoded_glyphs: print glyph print "" return render(request, 'index.html', { 'encoded_glyphs': encoded_glyphs })
def test_noninteractive_classifier(): # We assume the XML reading/writing itself is fine (given # test_xml), but we should test the wrappers in classify anyway image = load_image("data/testline.png") ccs = image.cc_analysis() database = gamera_xml.glyphs_from_xml("data/testline.xml") classifier = knn.kNNNonInteractive(database, features=featureset, normalize=False) assert not classifier.is_interactive() assert len(classifier.get_glyphs()) == 66 _test_classification(classifier, ccs) classifier.serialize("tmp/serialized.knn") classifier.clear_glyphs() assert len(classifier.get_glyphs()) == 0 classifier.unserialize("tmp/serialized.knn")
def run_my_task(self, inputs, settings, outputs): glyphs = gamera_xml.glyphs_from_xml( inputs['GameraXML - Connected Components'][0]['resource_path']) print settings kwargs = { 'smoothing': settings['Smoothing'], 'extrema_threshold': 0, 'min_glyph_size': settings['Minimum Glyph Size'], 'max_recursive_cuts': settings['Maximum Recursive Cuts'], 'rotation': settings['Angle'], # will it cut? 'min_slice_spread_rel': settings['Minimum Slice Spread'], # minimum spread for a cut 'low_projection_threshold': settings[ 'Low Valley Threshold'], # FORCE a cut if valley under a certain value 'min_projection_segments': settings[ 'Minimum Segment Length'], # ++ less likely to cut, -- more slice points 'prefer_multi_cuts': False, 'prefer_x': settings['Prioritize X-slices'], # Debug Options 'print_projection_array': False, 'plot_projection_array': False, # script only 'save_cuts': False, } ps = ProjectionSplitter(**kwargs) output_glyphs = [] for g in glyphs: output_glyphs += ps.run(g) outfile_path = outputs['GameraXML - Connected Components'][0][ 'resource_path'] output_xml = gamera_xml.WriteXMLFile(glyphs=output_glyphs, with_features=True) output_xml.write_filename(outfile_path) return True
def main(options): init_gamera() glyph_corrections = dict(all_glyphs.g) errno = 0. numglyphs = 0. numpages = 0 for dirpath, dirnames, filenames in os.walk(options['gam']): numpages += 1 for f in filenames: if f == "page_glyphs_corr.xml": os.unlink(os.path.join(dirpath, f)) if f != "page_glyphs.xml": continue # lg.debug("Processing: {0}".format(f)) glyphs = gamera_xml.glyphs_from_xml(os.path.join(dirpath, f)) for i,g in enumerate(glyphs): numglyphs += 1 if g.get_main_id() in glyph_corrections.keys(): errno += 1 # this glyph needs correcting. action = glyph_corrections[g.get_main_id()] if action == "warn": lg.warn("=====> Problem detected with {0} on page {1} <=====".format(g.get_main_id(), os.path.basename(dirpath))) elif action == "": # lg.warn("Deleting {0} from page {1}".format(g.get_main_id(), dirpath)) glyphs.pop(i) else: # lg.warn("Replacing {0} with {1} on page {2}".format(g.get_main_id(), action, dirpath)) g.classify_manual(action) os.rename(os.path.join(dirpath, f), os.path.join(dirpath, "page_glyphs_uncorr.xml")) gamera_xml.WriteXMLFile(glyphs=glyphs, with_features=True).write_filename(os.path.join(dirpath, "page_glyphs.xml")) # save out glyphs. # move on. print "Totals: {0} errors out of {1} glyphs; {2} total error rate".format(errno, numglyphs, (errno / numglyphs) * 100) print "Number of pages: {0}".format(numpages) print "Average glyphs per page: {0}".format(numglyphs / numpages) print "Average errors per page: {0}".format(errno / numpages)
def run_my_task(self, inputs, settings, outputs): # Initialize a gamera classifier classifier_path = inputs['GameraXML - Connected Components'][0][ 'resource_path'] if 'GameraXML - Feature Selection' in inputs: features = inputs['GameraXML - Feature Selection'][0][ 'resource_path'] else: features = None # Handle importing the optional training classifier if 'GameraXML - Training Data' in inputs: training_database = glyphs_from_xml( inputs['GameraXML - Training Data'][0]['resource_path']) else: training_database = [] # Set the initial state if '@state' not in settings: settings['@state'] = ClassifierStateEnum.IMPORT_XML settings['glyphs'] = [] if settings['@state'] == ClassifierStateEnum.IMPORT_XML: run_import_stage(settings, classifier_path) settings['@state'] = ClassifierStateEnum.CORRECTION settings['glyphs_json'] = serialize_glyphs_to_json(settings) return self.WAITING_FOR_INPUT() elif settings['@state'] == ClassifierStateEnum.CORRECTION: # Update any changed glyphs update_changed_glyphs(settings) run_correction_stage(settings, training_database, features) settings['glyphs_json'] = serialize_glyphs_to_json(settings) return self.WAITING_FOR_INPUT() else: # Update changed glyphs update_changed_glyphs(settings) # Do one final classification before quitting cknn = run_correction_stage(settings, training_database, features) # No more corrections are required. We can now output the data run_output_stage(cknn, settings['glyphs'], outputs) # Remove the JSON string from the database purge_serialized_json(settings)
def run_my_task(self, inputs, settings, outputs): # Inputs infile_path = inputs['JSOMR of staves and page properties'][0][ 'resource_path'] with open(infile_path, 'r') as infile: jsomr_string = infile.read() jsomr = json.loads(jsomr_string) glyphs = gamera_xml.glyphs_from_xml( inputs['GameraXML - Classified Connected Components'][0] ['resource_path']) kwargs = { 'discard_size': settings['Discard Size'], } pf = PitchFinder(**kwargs) page = jsomr['page'] staves = jsomr['staves'] pitches = pf.get_pitches(glyphs, staves) # Outputs jsomr = { 'page': page, 'staves': staves, 'glyphs': pitches, } outfile_path = outputs['JSOMR of glyphs, staves, and page properties'][ 0]['resource_path'] with open(outfile_path, 'w') as outfile: outfile.write(json.dumps(jsomr)) return True
def fix_spelling_mistakes(page_glyphs_file, glyph_corrections): errno = 0. numglyphs = 0. numpages = 0 glyphs = gamera_xml.glyphs_from_xml(page_glyphs_file) k = glyph_corrections.keys() for i,g in enumerate(glyphs): numglyphs += 1 if g.get_main_id() in k: errno += 1 # this glyph needs correcting. action = glyph_corrections[g.get_main_id()] if action == "warn": lg.warn("=====> Problem detected with {0} on page {1} <=====".format(g.get_main_id(), os.path.basename(page_glyphs_file))) elif action == "": # lg.warn("Deleting {0} from page {1}".format(g.get_main_id(), dirpath)) glyphs.pop(i) else: # lg.warn("Replacing {0} with {1} on page {2}".format(g.get_main_id(), action, dirpath)) g.classify_manual(action) return (glyphs, numglyphs, errno)
def test(): glyphs = gamera_xml.glyphs_from_xml( r"C:\Documents and Settings\Karl MacMillan\Desktop\test\prod.xml") glyphs = strip_small_categories(glyphs) from gamera.plugins import features k = knn.kNN() print(k.features) features.generate_features_list(glyphs, k.feature_functions) print("Getting gstats") graph_stats = get_graph_stats(glyphs, k) gstats = knn.get_glyphs_stats(glyphs) max_dist = max_distance(glyphs, k) print(max_dist) file = open("results.txt", "w") global_max = [[], []] local_max = [[], []] all = [[], []] graph = [[], []] gr_ccorrect = 0 gr_icorrect = 0 for x in glyphs: local_max_dist = local_max_distance(glyphs, x, k) ans = k.classify_with_images(glyphs, x, 1) file.write(ans[0][1] + ",") # + str(ans[0][0]) + ",") correct = 0 if x.get_main_id() == ans[0][1]: file.write("1,") correct = 1 else: file.write("0,") g = 1.0 - (ans[0][0] / max_dist) global_max[correct].append(g) file.write(str(g) + ",") l = 1.0 - (ans[0][0] / local_max_dist) local_max[correct].append(l) file.write(str(l) + ",") a = stats.samplestdev([ans[0][0], gstats[ans[0][1]][1]]) all[correct].append(a) file.write(str(a) + ",") gr = stats.samplestdev([ans[0][0], graph_stats[ans[0][1]]]) if (gr <= 1 and correct): gr_ccorrect += 1 if (gr > 1 and not correct): gr_icorrect += 1 graph[correct].append(gr) file.write(str(gr)) file.write("\n") print("num correct: %d num incorrect: %d" % (len(global_max[1]), len(global_max[0]))) print("confidence %f %f %f" % (((gr_ccorrect + gr_icorrect) / float(len(glyphs))), gr_ccorrect / float(len(glyphs) - len(global_max[0])), gr_icorrect / float(len(glyphs) - len(global_max[1])))) cgm = -1 igm = -1 cgs = -1 igs = -1 if (len(global_max[0])): igm = stats.mean(global_max[0]) igs = stats.samplestdev(global_max[0]) if (len(global_max[1])): cgm = stats.mean(global_max[1]) cgs = stats.samplestdev(global_max[1]) clm = -1 ilm = -1 cls = -1 ils = -1 if (len(local_max[0])): ilm = stats.mean(local_max[0]) ils = stats.samplestdev(local_max[0]) if (len(local_max[1])): clm = stats.mean(local_max[1]) cls = stats.samplestdev(local_max[1]) cam = -1 iam = -1 cas = -1 ias = -1 if (len(all[0])): iam = stats.mean(all[0]) ias = stats.samplestdev(all[0]) if (len(all[1])): cam = stats.mean(all[1]) cas = stats.samplestdev(all[1]) cgraphm = -1 igraphm = -1 cgraphs = -1 igraphs = -1 if (len(graph[0])): igraphm = stats.mean(graph[0]) igraphs = stats.samplestdev(graph[0]) if (len(graph[1])): cgraphm = stats.mean(graph[1]) cgraphs = stats.samplestdev(graph[1]) print("global correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgm, cgs, igm, igs)) print("local correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (clm, cls, ilm, ils)) print("all correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cam, cas, iam, ias)) print("graph correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgraphm, cgraphs, igraphm, igraphs)) def otsu_threshold(p): l = len(p) mu_T = 0.0 for i in range(l): mu_T += i * p[i] sigma_T = 0.0 for i in range(l): sigma_T += (i - mu_T) * (i - mu_T) * p[i] k_low = 0 while (p[k_low] == 0) and (k_low < (l - 1)): k_low += 1 k_high = l - 1 while (p[k_high] == 0) and (k_high > 0): k_low += 1 k_high -= 1 criterion = 0.0 thresh = 127 omega_k = 0.0 mu_k = 0.0 k = k_low while k <= k_high: omega_k += p[k] mu_k += k * p[k] expr_1 = (mu_T * omega_k - mu_k) sigma_b_k = expr_1 * expr_1 / (omega_k * (1 - omega_k)) if (criterion < sigma_b_k / sigma_T): criterion = sigma_b_k / sigma_T thresh = k k += 1 return thresh graph_l = graph[0][:] graph_l.extend(graph[1]) graph_l.sort() threshold = stats.mean(graph_l) print("threshold: " + str(threshold)) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[0])) * 100) num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[1])) * 100) graph_l = all[0][:] graph_l.extend(all[1]) graph_l.sort() threshold = stats.mean(graph_l) print("threshold: " + str(threshold)) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[0])) * 100) num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[1])) * 100) graph_l = local_max[0][:] graph_l.extend(local_max[1]) graph_l.sort() threshold = stats.mean(graph_l) print("threshold: " + str(threshold)) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[0])) * 100) num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print(num_wrong, num_wrong / float(len(graph[1])) * 100)
def test_glyphs_with_features_from_xml(): glyphs = gamera_xml.glyphs_from_xml("data/testline.xml", ["area", "aspect_ratio"]) assert len(glyphs) == 66 assert len(glyphs[0].features) == 2
from gamera.toolkits.aruspix.ax_file import * axfile = "" group = 0 # start options dialog dialog = Args( [FileOpen("Aruspix file", axfile, "*.axz"), Choice("group", ["Group", "No group"])], name="Select the file" ) params = dialog.show() if params is not None and params[0]: # map parameters i = 0 axfile = params[i] i += 1 group = params[i] i += 1 f = AxFile(axfile, "") gl = [] if group == 1: gl = gamera_xml.glyphs_from_xml(f.tmpdirname + "gamera_page_no_group.xml") else: gl = gamera_xml.glyphs_from_xml(f.tmpdirname + "gamera_page_group.xml") image = load_image(f.tmpdirname + "img2.tif") classifier = knn.kNNInteractive() classifier.display(gl, image)
def test_glyphs_to_xml_with_features(): glyphs = gamera_xml.glyphs_from_xml("data/testline.xml", feature_functions=features) gamera_xml.glyphs_to_xml("tmp/testline_test2.xml", glyphs, True) assert equal_files("tmp/testline_test2.xml", "data/testline_test2.xml")
def test_glyphs_to_xml_gz(): glyphs = gamera_xml.glyphs_from_xml("data/testline.xml") gamera_xml.glyphs_to_xml("tmp/testline_test1.xml.gz", glyphs, False) assert equal_files("tmp/testline_test1.xml.gz", "data/testline_test1.xml.gz", gz=True)
l, h = image.ncols, image.nrows p = int(0.5 + l / 2.0), int(0.5 + h / 2.0) return p if __name__ == "__main__": inImage, inXML = None, None (in0) = sys.argv[1] if '.png' in in0: inImage = in0 image = load_image(inImage) elif '.xml' in in0: inXML = in0 glyphs = gamera_xml.glyphs_from_xml(inXML) # remove files already there so they dont get stacked up filesPNG = glob.glob('./output/*.png') filesXML = glob.glob('./output/*.xml') for f in filesPNG + filesXML: os.remove(f) kwargs = { 'smoothing': 1, 'min_glyph_size': 20, 'max_recursive_cuts': 50, 'rotation': 45, # will it cut? 'min_slice_spread_rel': 0.2, # minimum spread for a cut
def test_glyphs_from_xml_gz(): glyphs = gamera_xml.glyphs_from_xml("data/testline.xml.gz") assert len(glyphs) == 66
def _test_malformed(): glyphs = gamera_xml.glyphs_from_xml("data/malformed.xml")
def _test_missing_attributes(): glyphs = gamera_xml.glyphs_from_xml("data/missing_attributes.xml")
def test_write_xml(): glyphs = gamera_xml.glyphs_from_xml("data/testline.xml") writer = gamera_xml.WriteXML(glyphs) result_string = writer.string() writer.write_filename("tmp/testline_test3.xml") assert equal_files("tmp/testline_test3.xml", "data/testline_test3.xml")
def __init__(self, gamera_file_path): self.gamera_images = glyphs_from_xml(gamera_file_path) # Construct the glyph objects self.glyphs = [gamera_image_to_glyph(image) for image in self.gamera_images]
def test(): glyphs = gamera_xml.glyphs_from_xml(r"C:\Documents and Settings\Karl MacMillan\Desktop\test\prod.xml") glyphs = strip_small_categories(glyphs) from gamera.plugins import features k = knn.kNN() print k.features features.generate_features_list(glyphs, k.feature_functions) print "Getting gstats" graph_stats = get_graph_stats(glyphs, k) gstats = knn.get_glyphs_stats(glyphs) max_dist = max_distance(glyphs, k) print max_dist file = open("results.txt", "w") global_max = [[],[]] local_max = [[],[]] all = [[],[]] graph = [[],[]] gr_ccorrect = 0 gr_icorrect = 0 for x in glyphs: local_max_dist = local_max_distance(glyphs, x, k) ans = k.classify_with_images(glyphs, x, 1) file.write(ans[0][1] + ",")# + str(ans[0][0]) + ",") correct = 0 if x.get_main_id() == ans[0][1]: file.write("1,") correct = 1 else: file.write("0,") g = 1.0 - (ans[0][0] / max_dist) global_max[correct].append(g) file.write(str(g) + ",") l = 1.0 - (ans[0][0] / local_max_dist) local_max[correct].append(l) file.write(str(l) + ",") a = stats.samplestdev([ans[0][0],gstats[ans[0][1]][1]]) all[correct].append(a) file.write(str(a) + ",") gr = stats.samplestdev([ans[0][0],graph_stats[ans[0][1]]]) if (gr <= 1 and correct): gr_ccorrect += 1 if (gr > 1 and not correct): gr_icorrect += 1 graph[correct].append(gr) file.write(str(gr)) file.write("\n") print "num correct: %d num incorrect: %d" % (len(global_max[1]), len(global_max[0])) print "confidence %f %f %f" % (((gr_ccorrect + gr_icorrect) / float(len(glyphs))), gr_ccorrect / float(len(glyphs) - len(global_max[0])), gr_icorrect / float(len(glyphs) - len(global_max[1]))) cgm = -1 igm = -1 cgs = -1 igs = -1 if (len(global_max[0])): igm = stats.mean(global_max[0]) igs = stats.samplestdev(global_max[0]) if (len(global_max[1])): cgm = stats.mean(global_max[1]) cgs = stats.samplestdev(global_max[1]) clm = -1 ilm = -1 cls = -1 ils = -1 if (len(local_max[0])): ilm = stats.mean(local_max[0]) ils = stats.samplestdev(local_max[0]) if (len(local_max[1])): clm = stats.mean(local_max[1]) cls = stats.samplestdev(local_max[1]) cam = -1 iam = -1 cas = -1 ias = -1 if (len(all[0])): iam = stats.mean(all[0]) ias = stats.samplestdev(all[0]) if (len(all[1])): cam = stats.mean(all[1]) cas = stats.samplestdev(all[1]) cgraphm = -1 igraphm = -1 cgraphs = -1 igraphs = -1 if (len(graph[0])): igraphm = stats.mean(graph[0]) igraphs = stats.samplestdev(graph[0]) if (len(graph[1])): cgraphm = stats.mean(graph[1]) cgraphs = stats.samplestdev(graph[1]) print "global correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgm, cgs, igm, igs) print "local correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (clm, cls, ilm, ils) print "all correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cam, cas, iam, ias) print "graph correct avg: %f stdev: %f incorrect avg: %f stddev: %f" % (cgraphm, cgraphs, igraphm, igraphs) def otsu_threshold(p): l = len(p) mu_T = 0.0 for i in range(l): mu_T += i * p[i] sigma_T = 0.0 for i in range(l): sigma_T += (i-mu_T)*(i-mu_T)*p[i] k_low = 0 while (p[k_low] == 0) and (k_low < (l - 1)): k_low += 1 k_high = l - 1 while (p[k_high] == 0) and (k_high > 0): k_low += 1 k_high -= 1 criterion = 0.0 thresh = 127 omega_k = 0.0 mu_k = 0.0 k = k_low while k <= k_high: omega_k += p[k] mu_k += k*p[k] expr_1 = (mu_T*omega_k - mu_k) sigma_b_k = expr_1 * expr_1 / (omega_k*(1-omega_k)) if (criterion < sigma_b_k/sigma_T): criterion = sigma_b_k/sigma_T thresh = k; k += 1 return thresh graph_l = graph[0][:] graph_l.extend(graph[1]) graph_l.sort() threshold = stats.mean(graph_l) print "threshold: " + str(threshold) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[0])) * 100 num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[1])) * 100 graph_l = all[0][:] graph_l.extend(all[1]) graph_l.sort() threshold = stats.mean(graph_l) print "threshold: " + str(threshold) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[0])) * 100 num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[1])) * 100 graph_l = local_max[0][:] graph_l.extend(local_max[1]) graph_l.sort() threshold = stats.mean(graph_l) print "threshold: " + str(threshold) num_wrong = 0 for x in graph[0]: if x < threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[0])) * 100 num_wrong = 0 for x in graph[1]: if x >= threshold: num_wrong += 1 print num_wrong, num_wrong / float(len(graph[1])) * 100