def run_all_uwiii(): # run all UW-III images output_dir = "fullpage/" result_list = [] for imgfilename in sys.argv[1:]: basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = uwiii_xmldir + "{0}.xml".format( basename.replace("BIN", "ZONE")) # print basename, document_id, gtruth_xml_filename result = run_image_with_gtruth(imgfilename, gtruth_xml_filename) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open("uwiii.pkl", "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = output_dir + "{0}_zones.png".format(document_id) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open("uwiii.dat", "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def run_file_list(img_filelist, xml_filelist, output_dir, output_basename): result_list = [] pickle_filename = os.path.join(output_dir, output_basename + ".pkl") dat_filename = os.path.join(output_dir, output_basename + ".dat") for imgfilename, gtruth_xml_filename in zip(img_filelist, xml_filelist): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) result = run_image_with_gtruth(imgfilename, gtruth_xml_filename, output_dir) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open(pickle_filename, "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = os.path.join(output_dir, "{0}_zones.png".format(basename)) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open(dat_filename, "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def run_all_uwiii(): # run all UW-III images output_dir = "fullpage/" result_list = [] for imgfilename in sys.argv[1:]: basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = uwiii_xmldir + "{0}.xml".format(basename.replace("BIN", "ZONE")) # print basename, document_id, gtruth_xml_filename result = run_image_with_gtruth(imgfilename, gtruth_xml_filename) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open("uwiii.pkl", "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = output_dir + "{0}_zones.png".format(document_id) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open("uwiii.dat", "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def run(imgfilename): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # destination for the output files make_output_dir(document_id) # stripnum = get_stripnum_from_filename( basename ) out_imgfilename = output_dir + "{0}_rast.png".format(basename) xml_filename = output_dir + "{0}_rast.xml".format(basename) input_dir = "{0}/{1}/".format(num_rows_in_strip, document_id) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = input_dir + "{0}.xml".format( basename.replace("BIN", "ZONE")) print "imgfilename=", imgfilename print "out_imgfilename=", out_imgfilename print "xml_filename=", xml_filename print "gtruth_xml_filename=", gtruth_xml_filename # sys.exit(0) # segment the image cmd = "./rast-ocropus {0} {1}".format(imgfilename, out_imgfilename) print cmd try: result = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError: return (imgfilename, "failed") # remove some clutter os.unlink(out_imgfilename) # write the XML results with open(xml_filename, "w") as outfile: print >> outfile, result print "wrote", xml_filename # run the compare cmd = "runZoneComp -g {0} -d {1}".format(gtruth_xml_filename, xml_filename) print cmd result = subprocess.check_output(cmd, shell=True) # get the segmentation metric from the output metric = parse_runZoneComp_result(result) print "metric={0}".format(metric) # draw the experimental result onto the input image out_imgfilename = output_dir + "{0}_rast_zone.png".format(basename) fname = drawxml.draw_zones(xml_filename, imgfilename, out_imgfilename) print "wrote", fname # remove some clutter # os.unlink(xml_filename) return (imgfilename, metric)
def run( imgfilename ) : basename = get_basename(imgfilename) document_id = get_document_id_from_basename( basename ) # destination for the output files make_output_dir(document_id) # stripnum = get_stripnum_from_filename( basename ) out_imgfilename = output_dir + "{0}_rast.png".format( basename ) xml_filename = output_dir + "{0}_rast.xml".format( basename ) input_dir = "{0}/{1}/".format( num_rows_in_strip, document_id ) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = input_dir + "{0}.xml".format( basename.replace("BIN","ZONE") ) print "imgfilename=",imgfilename print "out_imgfilename=",out_imgfilename print "xml_filename=",xml_filename print "gtruth_xml_filename=",gtruth_xml_filename # sys.exit(0) # segment the image cmd = "./rast-ocropus {0} {1}".format( imgfilename, out_imgfilename ) print cmd try : result = subprocess.check_output( cmd, shell=True ) except subprocess.CalledProcessError : return (imgfilename,"failed") # remove some clutter os.unlink(out_imgfilename) # write the XML results with open(xml_filename,"w") as outfile : print >>outfile, result print "wrote", xml_filename # run the compare cmd = "runZoneComp -g {0} -d {1}".format( gtruth_xml_filename, xml_filename ) print cmd result = subprocess.check_output( cmd, shell=True ) # get the segmentation metric from the output metric = parse_runZoneComp_result( result ) print "metric={0}".format( metric ) # draw the experimental result onto the input image out_imgfilename = output_dir + "{0}_rast_zone.png".format( basename ) fname = drawxml.draw_zones( xml_filename, imgfilename, out_imgfilename ) print "wrote", fname # remove some clutter # os.unlink(xml_filename) return (imgfilename,metric)