def draw_boxes( boxfilename, outdir=None) : box_list = zonebox.load_boxes(boxfilename) print box_list # get the document id from the box file # imgfilename = "IMAGEBIN/{0}BIN.TIF".format( box_list[0].document_id ) imgfilename = "IMAGEBIN/{0}BIN.png".format( box_list[0].document_id ) # write the output image to this file and perhaps to another directory if not outdir is None : outfilename = outdir + "/" + get_basename(imgfilename) + "_zoneboxes.png" else : outfilename = get_basename(imgfilename) + "_zoneboxes.png" img = load_image(imgfilename) draw = ImageDraw.Draw(img) # draw the zone boxes onto the image for box in box_list : box.sanity() # print box.document_id # print box.corner_one # print box.corner_two upper_left = (box.corner_one["col"],box.corner_one["row"]) lower_right = (box.corner_two["col"],box.corner_two["row"]) draw.rectangle( (upper_left,lower_right), outline="red" ) img.save(outfilename) print "wrote",outfilename
def draw_boxes(boxfilename, outdir=None): box_list = zonebox.load_boxes(boxfilename) print box_list # get the document id from the box file # imgfilename = "IMAGEBIN/{0}BIN.TIF".format( box_list[0].document_id ) imgfilename = "IMAGEBIN/{0}BIN.png".format(box_list[0].document_id) # write the output image to this file and perhaps to another directory if not outdir is None: outfilename = outdir + "/" + get_basename( imgfilename) + "_zoneboxes.png" else: outfilename = get_basename(imgfilename) + "_zoneboxes.png" img = load_image(imgfilename) draw = ImageDraw.Draw(img) # draw the zone boxes onto the image for box in box_list: box.sanity() # print box.document_id # print box.corner_one # print box.corner_two upper_left = (box.corner_one["col"], box.corner_one["row"]) lower_right = (box.corner_two["col"], box.corner_two["row"]) draw.rectangle((upper_left, lower_right), outline="red") img.save(outfilename) print "wrote", outfilename
def main() : from basename import get_basename infilename = sys.argv[1] ndata = imtools.load_image( infilename, mode="L", dtype="uint8" ) print ndata.shape # aggressive median filter to smooth out as much noise as possible if 1 : print "filtering..." fdata = scipy.ndimage.filters.median_filter( ndata, size=(5,5) ) else : fdata = np.copy(ndata) # no smoothing basename = get_basename(infilename) global mkoutfilename mkoutfilename = lambda s : "{0}_{1}.tif".format(basename,s) imtools.clip_and_save( fdata, mkoutfilename("gray")) np.save("gray.npy",fdata) peaks_list, pixel_counts = find_histogram_peaks( fdata ) print "peaks=",peaks_list print "counts=",pixel_counts
def make_sliding_strips_from_box( boxfilename ) : basename = get_basename( boxfilename ) # create the output directory for all the files I'm about to create output_dir = make_output_dir(basename) # return box_list = zonebox.load_boxes( boxfilename ) # load the image associated with this box list # assume all the boxes have the same image name (they should) imgfilename = "IMAGEBIN/{0}BIN.png".format( box_list[0].document_id ) # get the image as a numpy array data = mkslices.load_image( imgfilename ) strip_list = make_all_strips_images( data, basename, output_dir ) # # Now make the ground truth files for each strip # # convert the box list into a list of strips box_strip_list = [ rects.Strip(box=box) for box in box_list ] # slice up the ground truth into individual XML files make_all_gtruth_xml( box_strip_list, data, output_dir, basename )
def main( infilename ) : basename = get_basename(infilename) # try to load previous file for speed try : ycc = np.load( basename+"_ycc.npy" ) except IOError as err : # convert image to ycc ycc = convert_image_to_ycc( infilename ) clip_and_save( convert_ycc_to_bgr(ycc),"out.tif") # davep 09-Jan-2013 ; XXX stop after the conversion return gamma = 2.2 run_gamma( ycc, basename, gamma ) ycc_gamma = gamma_enhance( ycc, gamma ) ycc_contrast = contrast_enhance( ycc_gamma, 1.4 ) outfilename = "{0}_gamma_contrast.tif".format( basename ) clip_and_save( convert_ycc_to_rgb( ycc_contrast ), outfilename ) # run_contrast( ycc, basename, 2.2 ) # for contrast in np.linspace( 0, 2, num=30 ) : # run_contrast( ycc, basename, contrast ) for b in np.linspace(-1,1) : contrast = 1.0 brightness = int(b*127)
def make_sliding_strips_from_box(boxfilename): basename = get_basename(boxfilename) # create the output directory for all the files I'm about to create output_dir = make_output_dir(basename) # return box_list = zonebox.load_boxes(boxfilename) # load the image associated with this box list # assume all the boxes have the same image name (they should) imgfilename = "IMAGEBIN/{0}BIN.png".format(box_list[0].document_id) # get the image as a numpy array data = mkslices.load_image(imgfilename) strip_list = make_all_strips_images(data, basename, output_dir) # # Now make the ground truth files for each strip # # convert the box list into a list of strips box_strip_list = [rects.Strip(box=box) for box in box_list] # slice up the ground truth into individual XML files make_all_gtruth_xml(box_strip_list, data, output_dir, basename)
def run_image_with_gtruth(imgfilename, gtruth_xml_filename, output_dir): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) out_imgfilename = os.path.join(output_dir, "{0}_{1}.png".format(basename, segmentation_algorithm)) xml_filename = os.path.join(output_dir, "{0}_{1}.xml".format(basename, segmentation_algorithm)) # inputs # print "imgfilename=",imgfilename # print "gtruth_xml_filename=",gtruth_xml_filename # outputs # print "xml_filename=",xml_filename # print "out_imgfilename=",out_imgfilename # sys.exit(0) # segment the image cmd = "{0} {1} {2}".format(segmentation_cmd, imgfilename, out_imgfilename) # cmd = "./rast-ocropus {0} {1}".format( imgfilename, out_imgfilename ) signal.signal(signal.SIGALRM, handler) # small optimization; if the result already exists, don't run it again # (crash recovery) print cmd if os.path.exists(xml_filename): print "{0} already exists so assume seg already run".format(xml_filename) else: global running_filename running_filename = imgfilename signal.alarm(5) try: result = subprocess.check_output(cmd.split()) signal.alarm(0) except subprocess.CalledProcessError: signal.alarm(0) return {"output_image_file": out_imgfilename, "output_xml_file": "failed", "metric": 0} # write the XML results with open(xml_filename, "w") as outfile: print >> outfile, result print "wrote", xml_filename # run the compare cmd = "./runZoneComp -g {0} -d {1}".format(gtruth_xml_filename, xml_filename) print cmd try: result = subprocess.check_output(cmd.split()) except subprocess.CalledProcessError: return {"output_image_file": out_imgfilename, "output_xml_file": "failed", "metric": 0} # get the segmentation metric from the output metric = parse_runZoneComp_result(result) print "metric={0}".format(metric) return {"output_image_file": out_imgfilename, "output_xml_file": xml_filename, "metric": metric}
def main_2(): def make_outdir(img_filename): # split a png filename into an appropriate output path pathname, filename = os.path.split(img_filename) # print pathname # print filename dirlist = pathname.split(os.sep) # print dirlist, dirlist[1:] outdir = os.path.join(*dirlist[1:]) # print outdir return outdir output_dir_base = "600_winder_{0}".format(runseg.segmentation_algorithm) input_dirname = "600_winder" img_filelist, xml_filelist = load_image_and_xml_list(input_dirname) output_dir_hash = {} for i, x in zip(img_filelist, xml_filelist): od = make_outdir(i) if not od in output_dir_hash: output_dir_hash[od] = [] output_dir_hash[od].append(get_basename(i)) pklname = "{0}_output_dir_hash.pkl".format(input_dirname) f = open(pklname, "wb") pickle.dump(output_dir_hash, f) f.close() # print "wrote",pklname # fname_fmt = "{0}_{1}".format( input_dirname for dirname in output_dir_hash.keys(): filelist = [ os.path.join(input_dirname, dirname, f) for f in output_dir_hash[dirname] ] img_filelist = [s + ".png" for s in filelist] xml_filelist = [s + ".xml" for s in filelist] output_basename = os.path.split(dirname)[-1] print img_filelist[0] print xml_filelist[0] output_dir = os.path.join(output_dir_base, dirname) print "outputdir=", output_dir print output_basename if not os.path.exists(output_dir): os.makedirs(output_dir) runseg.run_file_list(img_filelist, xml_filelist, output_dir, output_basename) sys.exit(0)
def main_2() : def make_outdir(img_filename) : # split a png filename into an appropriate output path pathname,filename = os.path.split(img_filename) # print pathname # print filename dirlist = pathname.split(os.sep) # print dirlist, dirlist[1:] outdir = os.path.join(*dirlist[1:]) # print outdir return outdir output_dir_base = "600_winder_{0}".format(runseg.segmentation_algorithm) input_dirname = "600_winder" img_filelist, xml_filelist = load_image_and_xml_list( input_dirname ) output_dir_hash = {} for i,x in zip(img_filelist,xml_filelist) : od = make_outdir(i) if not od in output_dir_hash : output_dir_hash[od] = [] output_dir_hash[od].append( get_basename(i) ) pklname = "{0}_output_dir_hash.pkl".format(input_dirname) f = open( pklname, "wb" ) pickle.dump(output_dir_hash,f) f.close() # print "wrote",pklname # fname_fmt = "{0}_{1}".format( input_dirname for dirname in output_dir_hash.keys() : filelist = [ os.path.join(input_dirname,dirname,f) for f in output_dir_hash[dirname] ] img_filelist = [ s+".png" for s in filelist ] xml_filelist = [ s+".xml" for s in filelist ] output_basename = os.path.split(dirname)[-1] print img_filelist[0] print xml_filelist[0] output_dir = os.path.join(output_dir_base,dirname) print "outputdir=",output_dir print output_basename if not os.path.exists(output_dir) : os.makedirs(output_dir) runseg.run_file_list( img_filelist, xml_filelist, output_dir, output_basename ) sys.exit(0)
def main(): import imtools from basename import get_basename infilename = sys.argv[1] basename = get_basename(infilename) outfilename = "{0}_gray.tif".format(basename) ndata = imtools.load_image(infilename,dtype="uint8") gray = togray(ndata) imtools.clip_and_save(gray,outfilename)
def sanity_check_images(document_id): img_file_list, xml_file_list = get_file_lists(document_id) for imgfilename, xmlfilename in zip(img_file_list, xml_file_list): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) print imgfilename, xmlfilename, document_id img = Image.open(imgfilename) img.load() xmlfile = open(xmlfilename, "r") xmlfile.close() del img
def save_results(data): # get an output filename by parsing the first filename in the list basename = get_basename(data[0][0]) document_id = get_document_id_from_basename(basename) print document_id output_filename = output_dir + document_id + ".dat" # get just the valid metrics metric_list = [] num_failures = 0 for d in data: try: metric = float(d[1]) except ValueError: # skip metric = 0 num_failures += 1 metric_list.append(metric) print metric_list metric_data = np.asarray(metric_list, dtype="float") outfile = open(output_filename, "w") # write some overall statistics print >> outfile, "# id={0}".format(document_id) print >> outfile, "# mean={0}".format(np.mean(metric_data)) print >> outfile, "# median={0}".format(np.median(metric_data)) print >> outfile, "# stddev={0}".format(np.std(metric_data)) print >> outfile, "# num_failures={0}".format(num_failures) for d in data: basename = get_basename(d[0]) print >> outfile, basename, d[1] outfile.close()
def save_results( data ) : # get an output filename by parsing the first filename in the list basename = get_basename( data[0][0] ) document_id = get_document_id_from_basename( basename ) print document_id output_filename = output_dir + document_id + ".dat" # get just the valid metrics metric_list = [] num_failures = 0 for d in data : try : metric = float(d[1]) except ValueError : # skip metric = 0 num_failures += 1 metric_list.append( metric ) print metric_list metric_data = np.asarray( metric_list, dtype="float") outfile = open(output_filename,"w") # write some overall statistics print >>outfile, "# id={0}".format( document_id ) print >>outfile, "# mean={0}".format( np.mean(metric_data) ) print >>outfile, "# median={0}".format( np.median(metric_data) ) print >>outfile, "# stddev={0}".format( np.std(metric_data) ) print >>outfile, "# num_failures={0}".format( num_failures) for d in data : basename = get_basename(d[0]) print >>outfile, basename,d[1] outfile.close()
def run_all_uwiii(): # run all UW-III images output_dir = "fullpage/" result_list = [] for imgfilename in sys.argv[1:]: basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = uwiii_xmldir + "{0}.xml".format( basename.replace("BIN", "ZONE")) # print basename, document_id, gtruth_xml_filename result = run_image_with_gtruth(imgfilename, gtruth_xml_filename) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open("uwiii.pkl", "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = output_dir + "{0}_zones.png".format(document_id) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open("uwiii.dat", "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def get_winder_class_results(class_dir) : # load all .dat files corresponding to each class of image in the Winder # data set # gather all the datafiles from the fullpage winder fullpage_winder = datfile.find_all(class_dir) class_data = [] class_names = [] for datfilename in fullpage_winder : ndata = datfile.load(datfilename) basename = get_basename(datfilename) class_data.append( np.mean(ndata) ) class_names.append( basename.replace( "_"," ") ) return (class_data,class_names)
def main() : infilename = sys.argv[1] ndata = imtools.load_image(infilename,dtype="uint8",mode="L") # aggressive median filter to smooth out as much noise as possible fdata = scipy.ndimage.filters.median_filter( ndata, size=(5,5) ) basename = get_basename(infilename) global mkoutfilename mkoutfilename = lambda s : "{0}_{1}.tif".format(basename,s) is_straight = straightness_test( fdata, 126, 146, 60 ) if is_straight : print "is straight enough" else : print "is NOT straight enough"
def load_image_and_xml_list(dirname): # sweep a dir; load the list of .png and corresponding ground truth .xml # files. Store in a .pkl as cache pklfilename = dirname + ".pkl" test = 0 if os.path.exists(pklfilename): pfile = open(pklfilename, "rb") img_filelist, xml_filelist = pickle.load(pfile) pfile.close() print "loaded from pickle" return img_filelist, xml_filelist img_filelist = [] xml_filelist = [] for root, dirs, files in os.walk(dirname): for f in files: path = os.path.join(root, f) if path.endswith(".png"): imgfilename = path basename = get_basename(imgfilename) if test: img = Image.open(path) img.load() del img # look for the xmlfile xmlfilename = path.replace(".png", ".xml") if test: f = open(xmlfilename, "r") f.close() print imgfilename, xmlfilename img_filelist.append(imgfilename) xml_filelist.append(xmlfilename) pfile = open(pklfilename, "wb") pickle.dump((img_filelist, xml_filelist), pfile) pfile.close() return img_filelist, xml_filelist
def run_all_uwiii(): # run all UW-III images output_dir = "fullpage/" result_list = [] for imgfilename in sys.argv[1:]: basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = uwiii_xmldir + "{0}.xml".format(basename.replace("BIN", "ZONE")) # print basename, document_id, gtruth_xml_filename result = run_image_with_gtruth(imgfilename, gtruth_xml_filename) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open("uwiii.pkl", "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = output_dir + "{0}_zones.png".format(document_id) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open("uwiii.dat", "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def load_image_and_xml_list( dirname ) : # sweep a dir; load the list of .png and corresponding ground truth .xml # files. Store in a .pkl as cache pklfilename = dirname+".pkl" test = 0 if os.path.exists(pklfilename) : pfile=open(pklfilename,"rb") img_filelist,xml_filelist = pickle.load(pfile) pfile.close() print "loaded from pickle" return img_filelist,xml_filelist img_filelist = [] xml_filelist = [] for root,dirs,files in os.walk(dirname) : for f in files : path=os.path.join(root,f) if path.endswith(".png") : imgfilename = path basename = get_basename(imgfilename) if test : img = Image.open(path) img.load() del img # look for the xmlfile xmlfilename = path.replace(".png",".xml") if test: f = open(xmlfilename,"r") f.close() print imgfilename, xmlfilename img_filelist.append( imgfilename ) xml_filelist.append( xmlfilename ) pfile = open(pklfilename,"wb") pickle.dump((img_filelist,xml_filelist),pfile) pfile.close() return img_filelist,xml_filelist
def run_file_list(img_filelist, xml_filelist, output_dir, output_basename): result_list = [] pickle_filename = os.path.join(output_dir, output_basename + ".pkl") dat_filename = os.path.join(output_dir, output_basename + ".dat") for imgfilename, gtruth_xml_filename in zip(img_filelist, xml_filelist): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) result = run_image_with_gtruth(imgfilename, gtruth_xml_filename, output_dir) result_list.append(result) # save pickled file so can do interesting things with the results later # (especially if we crash) output = open(pickle_filename, "wb") pickle.dump(result_list, output) output.close() xmlfilename = result["output_xml_file"] # did we crash? if so don't bother trying to draw the XML if xmlfilename == "failed": print "{0} failed!".format(imgfilename) continue # draw the resulting XML onto the original input image out_imgfilename = os.path.join(output_dir, "{0}_zones.png".format(basename)) if os.path.exists(out_imgfilename): print "{0} already exists; not redrawing".format(out_imgfilename) continue drawxml.draw_zones(xmlfilename, imgfilename, out_imgfilename) print "wrote", out_imgfilename outfile = open(dat_filename, "w") for result in result_list: print >> outfile, "{0} {1} {2}".format(result["output_image_file"], result["output_xml_file"], result["metric"]) outfile.close()
def awinder(): # slice up Amy Winder's images output_dir_base = "{0}_winder/".format(num_rows_in_strip) # output_dir_base = "300_winder/" for imgfilename in sys.argv[1:]: basename = get_basename(imgfilename) input_path = os.path.dirname(imgfilename) # get rid of the trailing "/png" component (it's annoying) input_path = input_path.replace("/png", "/") output_dir = output_dir_base + input_path + basename print output_dir if os.path.exists(output_dir): print "{0} exists so assume files are OK".format(output_dir) continue else: os.makedirs(output_dir) make_sliding_strips_from_image(imgfilename, output_dir) xmlfilename = imgfilename.replace(".png", ".xml") xmlfilename = xmlfilename.replace("png", "gTruth") print xmlfilename try: zone_list = gtruthxml.parse_xml(xmlfilename) except IOError, e: if e.errno == 2: # some of the files are living in a ./xml subdir xmlfilename = imgfilename.replace(".png", ".xml") xmlfilename = xmlfilename.replace("png", "gTruth/xml") zone_list = gtruthxml.parse_xml(xmlfilename) else: raise print zone_list # get the image as a numpy array data = mkslices.load_image(imgfilename) # slice up the ground truth into individual XML files make_all_gtruth_xml(zone_list, data, output_dir, basename)
def awinder( ) : # slice up Amy Winder's images output_dir_base = "{0}_winder/".format(num_rows_in_strip) # output_dir_base = "300_winder/" for imgfilename in sys.argv[1:] : basename = get_basename(imgfilename) input_path = os.path.dirname(imgfilename) # get rid of the trailing "/png" component (it's annoying) input_path = input_path.replace( "/png", "/" ) output_dir = output_dir_base + input_path + basename print output_dir if os.path.exists(output_dir) : print "{0} exists so assume files are OK".format(output_dir) continue else: os.makedirs(output_dir) make_sliding_strips_from_image( imgfilename, output_dir ) xmlfilename = imgfilename.replace(".png",".xml") xmlfilename = xmlfilename.replace("png","gTruth") print xmlfilename try : zone_list = gtruthxml.parse_xml( xmlfilename ) except IOError,e: if e.errno==2 : # some of the files are living in a ./xml subdir xmlfilename = imgfilename.replace(".png",".xml") xmlfilename = xmlfilename.replace("png","gTruth/xml") zone_list = gtruthxml.parse_xml( xmlfilename ) else : raise print zone_list # get the image as a numpy array data = mkslices.load_image( imgfilename ) # slice up the ground truth into individual XML files make_all_gtruth_xml( zone_list, data, output_dir, basename )
def load_image( imgfilename ) : img = Image.open(imgfilename) img.load() if img.mode == "RGB" : errmsg="mode={0}; cowardly refusing a non-gray image".format( img.mode ) raise Exception( errmsg ) # single bit image? So much I don't know. I don know unless I convert to an # 8bpp gray, the numpy conversion gets... weird. if img.mode == "1" : img2 = img.convert("L") img = img2 del img2 basename = get_basename( imgfilename ) data = np.asarray(img,dtype="uint8") print "shape=",data.shape return data
def load_image(imgfilename): img = Image.open(imgfilename) img.load() if img.mode == "RGB": errmsg = "mode={0}; cowardly refusing a non-gray image".format( img.mode) raise Exception(errmsg) # single bit image? So much I don't know. I don know unless I convert to an # 8bpp gray, the numpy conversion gets... weird. if img.mode == "1": img2 = img.convert("L") img = img2 del img2 basename = get_basename(imgfilename) data = np.asarray(img, dtype="uint8") print "shape=", data.shape return data
def convert_image_to_ycc( infilename ) : basename = get_basename(infilename) img = Image.open( infilename ) img.load() if img.mode != "RGB" : print("{0} is not an RGB image.".format( infilename )) sys.exit(1) print("img size=",img.size) rgb = np.asarray( img, dtype="float" ) print(rgb.shape, rgb.dtype) # convert RGB -> YCC # ycc = np.zeros_like( rgb ) # for row in range(rgb.shape[0]): # ycc[row] = rgb[row] * rgb_to_ycc.T ycc = convert_rgb_to_ycc( rgb ) np.save( basename+"_ycc", ycc ) save_ycc_as_image( ycc, basename ) return ycc
def draw_zones(xmlfilename, imgfilename, outfilename=None): img = load_image(imgfilename) draw = ImageDraw.Draw(img) zone_list = gtruthxml.parse_xml(xmlfilename) for zone in zone_list: if zone.value == "Non-text": color = "red" else: color = "green" x1 = zone.rect[0].x y1 = zone.rect[0].y x2 = zone.rect[2].x y2 = zone.rect[2].y draw.rectangle(((x1, y1), (x2, y2)), outline=color) if outfilename is None: outfilename = get_basename(imgfilename) + "_zones.png" img.save(outfilename) return outfilename
def draw_zones( xmlfilename, imgfilename, outfilename=None ) : img = load_image( imgfilename ) draw = ImageDraw.Draw(img) zone_list = gtruthxml.parse_xml( xmlfilename ) for zone in zone_list : if zone.value=="Non-text" : color = "red" else : color = "green" x1 = zone.rect[0].x y1 = zone.rect[0].y x2 = zone.rect[2].x y2 = zone.rect[2].y draw.rectangle( ((x1,y1),(x2,y2)), outline=color) if outfilename is None : outfilename = get_basename( imgfilename )+"_zones.png" img.save(outfilename) return outfilename
def run_image_with_gtruth(imgfilename, gtruth_xml_filename, output_dir): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) out_imgfilename = os.path.join( output_dir, "{0}_{1}.png".format(basename, segmentation_algorithm)) xml_filename = os.path.join( output_dir, "{0}_{1}.xml".format(basename, segmentation_algorithm)) # inputs # print "imgfilename=",imgfilename # print "gtruth_xml_filename=",gtruth_xml_filename # outputs # print "xml_filename=",xml_filename # print "out_imgfilename=",out_imgfilename # sys.exit(0) # segment the image cmd = "{0} {1} {2}".format(segmentation_cmd, imgfilename, out_imgfilename) # cmd = "./rast-ocropus {0} {1}".format( imgfilename, out_imgfilename ) signal.signal(signal.SIGALRM, handler) # small optimization; if the result already exists, don't run it again # (crash recovery) print cmd if os.path.exists(xml_filename): print "{0} already exists so assume seg already run".format( xml_filename) else: global running_filename running_filename = imgfilename signal.alarm(5) try: result = subprocess.check_output(cmd.split()) signal.alarm(0) except subprocess.CalledProcessError: signal.alarm(0) return { "output_image_file": out_imgfilename, "output_xml_file": "failed", "metric": 0 } # write the XML results with open(xml_filename, "w") as outfile: print >> outfile, result print "wrote", xml_filename # run the compare cmd = "./runZoneComp -g {0} -d {1}".format(gtruth_xml_filename, xml_filename) print cmd try: result = subprocess.check_output(cmd.split()) except subprocess.CalledProcessError: return { "output_image_file": out_imgfilename, "output_xml_file": "failed", "metric": 0 } # get the segmentation metric from the output metric = parse_runZoneComp_result(result) print "metric={0}".format(metric) return { "output_image_file": out_imgfilename, "output_xml_file": xml_filename, "metric": metric }
def run( imgfilename ) : basename = get_basename(imgfilename) document_id = get_document_id_from_basename( basename ) # destination for the output files make_output_dir(document_id) # stripnum = get_stripnum_from_filename( basename ) out_imgfilename = output_dir + "{0}_rast.png".format( basename ) xml_filename = output_dir + "{0}_rast.xml".format( basename ) input_dir = "{0}/{1}/".format( num_rows_in_strip, document_id ) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = input_dir + "{0}.xml".format( basename.replace("BIN","ZONE") ) print "imgfilename=",imgfilename print "out_imgfilename=",out_imgfilename print "xml_filename=",xml_filename print "gtruth_xml_filename=",gtruth_xml_filename # sys.exit(0) # segment the image cmd = "./rast-ocropus {0} {1}".format( imgfilename, out_imgfilename ) print cmd try : result = subprocess.check_output( cmd, shell=True ) except subprocess.CalledProcessError : return (imgfilename,"failed") # remove some clutter os.unlink(out_imgfilename) # write the XML results with open(xml_filename,"w") as outfile : print >>outfile, result print "wrote", xml_filename # run the compare cmd = "runZoneComp -g {0} -d {1}".format( gtruth_xml_filename, xml_filename ) print cmd result = subprocess.check_output( cmd, shell=True ) # get the segmentation metric from the output metric = parse_runZoneComp_result( result ) print "metric={0}".format( metric ) # draw the experimental result onto the input image out_imgfilename = output_dir + "{0}_rast_zone.png".format( basename ) fname = drawxml.draw_zones( xml_filename, imgfilename, out_imgfilename ) print "wrote", fname # remove some clutter # os.unlink(xml_filename) return (imgfilename,metric)
def make_sliding_strips_from_image(imgfilename, output_dir): basename = get_basename(imgfilename) # get the image as a numpy array data = mkslices.load_image(imgfilename) strip_list = make_all_strips_images(data, basename, output_dir)
def test_get_basename(): assert get_basename('') == '' assert get_basename('/') == '' assert get_basename('/sort') == 'sort' assert get_basename('sort') == 'sort' assert get_basename('/usr/sort') == 'sort'
def main() : infilename = sys.argv[1] basename = get_basename(infilename) global mkoutfilename mkoutfilename = lambda s : "{0}_{1}.tif".format(basename,s) ndata = imtools.load_image( infilename, mode="L", dtype="uint8" ) print ndata.dtype, ndata.shape # # davep 19-Oct-2013 ; testing clip_rotated # clip_rotated(ndata,2074) # return # # get rid of the obnoxious bezel shadow in my test image >:-( # ndata = ndata[ bezel_row:, bezel_col: ] # aggressive median filter to smooth out as much noise as possible print "filtering..." fdata = scipy.ndimage.filters.median_filter( ndata, size=(5,5) ) imtools.clip_and_save( fdata, mkoutfilename("gray")) # find the optimum gray midpoint print "finding boundaries..." gray_low,gray_high = gray.calc_gray_boundaries(fdata) # XXX temp debug ; leave here while working on gray background discovery # return gray1 = np.where(fdata>gray_low,fdata,0) imtools.clip_and_save( gray1, mkoutfilename("gray1") ) gray2 = np.where(gray1<gray_high,gray1,0) imtools.clip_and_save( gray2, mkoutfilename("gray2") ) nz = np.nonzero( gray2 ) np.save("nz.npy",nz) # nz[0] is the rows # nz[1] is the cols min_row = np.min(nz[0]) max_row = np.max(nz[0]) min_col = np.min(nz[1]) max_col = np.max(nz[1]) print "min_row={0} max_row={1}".format(min_row,max_row) print "min_col={0} max_col={1}".format(min_col,max_col) # reload the original image ndata = imtools.load_image(infilename,dtype="uint8") # clip original image to bounding box bbox = ndata[min_row:max_row, min_col:max_col] imtools.clip_and_save(bbox,mkoutfilename("bbox")) # clip gray2 to bbox gray_bbox = gray2[min_row:max_row, min_col:max_col] np.save("gray.npy",gray_bbox) imtools.clip_and_save(gray_bbox,mkoutfilename("gray_bbox")) # XXX temp debug ; stop here while working on skew # draw_hypotenuse(infilename,nz) # sys.exit(0) # The q60 may not be heavily skewed. Sample the four corners of the # bounding box. If the areas at the corners are mostly gray, call it good. # Solves the problem of very, very small skew and the ragged edge of the # Q60 causing the edges to land halfway down the target. # # Note we run the straightness test on the filtered (smoothed) data is_straight = straight.straightness_test( fdata[min_row:max_row, min_col:max_col], gray_low, gray_high ) if is_straight : # close enough! print "close enough!" imtools.clip_and_save(bbox,mkoutfilename("q60")) return # davep 10-Oct-2013 ; stop here while testing straightness test # assert 0 print "is not straight enough so lets rotate" # davep 19-Sep-2013 ; new de-skew rotate_direction = detect_rotate_direction( gray_bbox ) print "rotate={0}".format(rotate_direction) rotation_angle, hyp_length = detect_rotation_angle( gray_bbox, rotate_direction ) print "angle={0}={1} hyp={2}".format( rotation_angle,math.degrees(rotation_angle),hyp_length) print "rotating radians={0} degrees={1}...".format(rotation_angle, math.degrees(rotation_angle)) rot = scipy.ndimage.interpolation.rotate( bbox, math.degrees(rotation_angle) ) print "rot=",rot.shape imtools.clip_and_save(rot,mkoutfilename("rot")) clip_rotated(rot,hyp_length) # # XXX temp debug ; leave here while working on new de-skew straightness test sys.exit(0) # make a triangle so we can de-skew. # p1 is the upper right of Q60 # p2 is the upper left # p3 is the lower left # # Counter-clockwise rotated # # p1 # p2 # # # p3 # # # Clockwise rotate # # p2 # # p1 # p3 # # # Right Triangles # # .--------------p1 # | # | # | # p2 # # # p2-------------. # | # | # | # p1 # if the target is pushed against a scanner edge, there will be multiple # locations at the min/max rows_at_min = np.where(nz[0]==min_row)[0] rows_at_max = np.where(nz[0]==max_row)[0] cols_at_min = np.where(nz[1]==min_col)[0] cols_at_max = np.where(nz[1]==max_col)[0] # Euclidean distance dist = lambda P,Q : math.sqrt( (P[0]-Q[0])**2 + (P[1]-Q[1])**2 ) A = min_row, nz[1][rows_at_min[-1]] # A = min_row, nz[1][np.argmin(nz[0])] B = nz[0][cols_at_max[-1]], max_col # B = nz[0][np.argmax(nz[1])], max_col C = max_row, nz[1][np.argmax(nz[0])] print "A={0} B={1} C={2}".format(A,B,C) if dist(A,B) > dist(B,C) : clockwise = False p1 = B p2 = A p3 = nz[0][np.argmin(nz[1])],min_col else : clockwise = True p1 = A p2 = nz[0][np.argmin(nz[1])],min_col p3 = C print "p1=",p1 print "p2=",p2 print "p3=",p3 if clockwise : print "rotate clockwise" triangle = ndata[p1[0]:p2[0], p2[1]:p1[1]] else : print "rotate counter-clockwise" triangle = ndata[p2[0]:p1[0], p2[1]:p1[1]] imtools.clip_and_save(triangle,mkoutfilename("triangle")) tri_width = abs(p1[1] - p2[1]) tri_height = abs(p2[0] - p1[0]) print "tri_width={0} tri_height={1}".format(tri_width,tri_height) # length of upper edge (hypotenuse) is Euclidean distance between the two # points hyp_len = math.sqrt( (p1[0]-p2[0])**2 + (p1[1]-p2[1])**2 ) print "hyp_len=",hyp_len if clockwise : theta = math.acos( float(tri_width)/hyp_len ) theta_degrees = -math.degrees(theta) else : theta = math.acos( float(tri_height)/hyp_len ) theta_degrees = 90 - math.degrees(theta) print "theta=",theta,theta_degrees rot = scipy.ndimage.interpolation.rotate( bbox, theta_degrees) print rot.shape imtools.clip_and_save(rot,mkoutfilename("rot")) q60_height = math.sqrt( (p3[0]-p2[0])**2 + (p3[1]-p2[1])**2 ) q60_width = math.sqrt( (p2[0]-p1[0])**2 + (p2[1]-p1[1])**2 ) print "q60_width={0} q60_height={1}".format(q60_width,q60_height) center = rot.shape[0]/2,rot.shape[1]/2 print "center=",center q60 = rot[ center[0]-q60_height/2 : center[0]+q60_height/2 , center[1]-q60_width/2 : center[1]+q60_width/2 ] imtools.clip_and_save(q60,mkoutfilename("q60"))
def main() : infilename = sys.argv[1] basename = get_basename(infilename) outfilename = "{0}_debezel.tif".format(basename) debezel_done = debezel( infilename, outfilename )
def save_to_sqlite() : winder_results, uwiii_results = load_all_results() # pklname = "winder_results.pkl" # with open(pklname,"rb") as f : # winder_results = pickle.load(f) # print "loaded",pklname conn = sqlite3.connect("pageseg.db") conn.text_factory = str cur = conn.cursor() cur.execute( "DROP TABLE pageseg" ) creat = """ CREATE TABLE IF NOT EXISTS pageseg (filename text, algorithm text, stripsize text, dataset text, imgclass text, metrics text) """ cur.execute( creat ) store_result_data_to_db( cur, winder_results ) store_result_data_to_db( cur, uwiii_results ) # my UW-III fullpage rast and voronoi are in separate datafiles # I ran them on multiple machines, splitting the jobs by imgclass and by # algorithm. # The datafile contains lines like: # fullpage_vor/A001BIN_vor.png fullpage_vor/A001BIN_vor.xml 0.0 file_list = ( "uwiii_fullpage_rast.dat", "uwiii_fullpage_vor.dat" ) for filename in [ os.path.join("uwiii_fullpage",f) for f in file_list ] : # read all lines from the file into an array with open( filename, "r" ) as infile : lines = [ l.strip() for l in infile.readlines() ] # parse the data into a dict we will put into the DB for l in lines : # line should look like: # fullpage_vor/A001BIN_vor.png fullpage_vor/A001BIN_vor.xml 0.0 # split into space separated fields fields = l.split() # metric is the last field data = np.array( fields[-1], dtype="float" ) # get the png filename; we'll use it to split into filename and # algorithm basename = get_basename(fields[0]) filename,algorithm = basename.split("_") f = dict(db_fields) f["filename"] = filename f["algorithm"] = algorithm f["stripsize"] = "fullpage" f["dataset"] = "uwiii" # first letter of the base e.g., W1U8BIN_vor f["imgclass"] = basename[0] cur.execute( "INSERT INTO pageseg VALUES(?,?,?,?,?,?)", (f["filename"],f["algorithm"],f["stripsize"], f["dataset"],f["imgclass"],data.tostring())) conn.commit() conn.close()
def run(imgfilename): basename = get_basename(imgfilename) document_id = get_document_id_from_basename(basename) # destination for the output files make_output_dir(document_id) # stripnum = get_stripnum_from_filename( basename ) out_imgfilename = output_dir + "{0}_rast.png".format(basename) xml_filename = output_dir + "{0}_rast.xml".format(basename) input_dir = "{0}/{1}/".format(num_rows_in_strip, document_id) # zone box files use "ZONE" instead of "BIN" # e.g., # A00ABIN_300_010_2990.png -> A00AZONE_300_010_2990.xml gtruth_xml_filename = input_dir + "{0}.xml".format( basename.replace("BIN", "ZONE")) print "imgfilename=", imgfilename print "out_imgfilename=", out_imgfilename print "xml_filename=", xml_filename print "gtruth_xml_filename=", gtruth_xml_filename # sys.exit(0) # segment the image cmd = "./rast-ocropus {0} {1}".format(imgfilename, out_imgfilename) print cmd try: result = subprocess.check_output(cmd, shell=True) except subprocess.CalledProcessError: return (imgfilename, "failed") # remove some clutter os.unlink(out_imgfilename) # write the XML results with open(xml_filename, "w") as outfile: print >> outfile, result print "wrote", xml_filename # run the compare cmd = "runZoneComp -g {0} -d {1}".format(gtruth_xml_filename, xml_filename) print cmd result = subprocess.check_output(cmd, shell=True) # get the segmentation metric from the output metric = parse_runZoneComp_result(result) print "metric={0}".format(metric) # draw the experimental result onto the input image out_imgfilename = output_dir + "{0}_rast_zone.png".format(basename) fname = drawxml.draw_zones(xml_filename, imgfilename, out_imgfilename) print "wrote", fname # remove some clutter # os.unlink(xml_filename) return (imgfilename, metric)
def make_sliding_strips_from_image( imgfilename, output_dir ) : basename = get_basename( imgfilename ) # get the image as a numpy array data = mkslices.load_image( imgfilename ) strip_list = make_all_strips_images( data, basename, output_dir )
def save_to_sqlite(): winder_results, uwiii_results = load_all_results() # pklname = "winder_results.pkl" # with open(pklname,"rb") as f : # winder_results = pickle.load(f) # print "loaded",pklname conn = sqlite3.connect("pageseg.db") conn.text_factory = str cur = conn.cursor() cur.execute("DROP TABLE pageseg") creat = """ CREATE TABLE IF NOT EXISTS pageseg (filename text, algorithm text, stripsize text, dataset text, imgclass text, metrics text) """ cur.execute(creat) store_result_data_to_db(cur, winder_results) store_result_data_to_db(cur, uwiii_results) # my UW-III fullpage rast and voronoi are in separate datafiles # I ran them on multiple machines, splitting the jobs by imgclass and by # algorithm. # The datafile contains lines like: # fullpage_vor/A001BIN_vor.png fullpage_vor/A001BIN_vor.xml 0.0 file_list = ("uwiii_fullpage_rast.dat", "uwiii_fullpage_vor.dat") for filename in [os.path.join("uwiii_fullpage", f) for f in file_list]: # read all lines from the file into an array with open(filename, "r") as infile: lines = [l.strip() for l in infile.readlines()] # parse the data into a dict we will put into the DB for l in lines: # line should look like: # fullpage_vor/A001BIN_vor.png fullpage_vor/A001BIN_vor.xml 0.0 # split into space separated fields fields = l.split() # metric is the last field data = np.array(fields[-1], dtype="float") # get the png filename; we'll use it to split into filename and # algorithm basename = get_basename(fields[0]) filename, algorithm = basename.split("_") f = dict(db_fields) f["filename"] = filename f["algorithm"] = algorithm f["stripsize"] = "fullpage" f["dataset"] = "uwiii" # first letter of the base e.g., W1U8BIN_vor f["imgclass"] = basename[0] cur.execute("INSERT INTO pageseg VALUES(?,?,?,?,?,?)", (f["filename"], f["algorithm"], f["stripsize"], f["dataset"], f["imgclass"], data.tostring())) conn.commit() conn.close()
def make_gtruth_slices(boxfilename): basename = get_basename(boxfilename) box_list = zonebox.load_boxes(boxfilename) print "found", len(box_list), "boxes" # load the image associated with this box list # assume all the boxes have the same image name (they should) imgfilename = "IMAGEBIN/{0}BIN.png".format(box_list[0].document_id) img = drawboxes.load_image(imgfilename) print img.mode, img.size num_cols, num_rows = img.size print "rows={0} cols={1}".format(num_rows, num_cols) draw = ImageDraw.Draw(img) # starting strip as wide as the iamge with our base number of rows s = rects.Strip(width=num_cols, height=strip_rows) box_strip_list = [rects.Strip(box=box) for box in box_list] data = np.asarray(img, dtype="uint8") print "shape=", data.shape # draw the ground truth in blue as sanity check (should see no blue in the # output image) for box_strip in box_strip_list: upper_left = box_strip.rect[0].x, box_strip.rect[0].y lower_right = box_strip.rect[2].x, box_strip.rect[2].y draw.rectangle((upper_left, lower_right), outline="blue") # iterate the strip down the page, calculating all the box intersections # for each strip row = 0 strip_counter = 0 while row < num_rows: print "strip=", s upper_left = s.rect[0].x, s.rect[0].y lower_right = s.rect[2].x, s.rect[2].y draw.rectangle((upper_left, lower_right), outline="green") # linear search all the boxes searching for those that match this strip box_intersect_list = [] for box_strip in box_strip_list: isect = rects.strip_intersect(box_strip, s) if isect: print 'isect=', isect # PIL's Draw is x,y order upper_left = isect.rect[0].x, isect.rect[0].y lower_right = isect.rect[2].x, isect.rect[2].y draw.rectangle((upper_left, lower_right), outline="red") # adjust the intersections so the new ground truth of the box # intersections starts at row=0 (making new images out of # strips so need ground truth for each image strip) for rect in isect.rect: # subtract out the starting Y position of upper left rect.y -= s.rect[0].y print "adjusted isect=", isect # save this intersection; we'll write to a new XML file box_intersect_list.append(isect) # save the intersections as XML xmlfilename = "{0}_s{1}.xml".format(basename, strip_counter) with open(xmlfilename, "w") as outfile: zone2xml.write_boxlist_to_xml(outfile, box_intersect_list) print "wrote", xmlfilename s.next_strip() row += strip_rows strip_counter += 1 outfilename = "{0}_out.png".format(basename) img.save(outfilename) print "wrote", outfilename
def make_gtruth_slices( boxfilename ) : basename = get_basename( boxfilename ) box_list = zonebox.load_boxes( boxfilename ) print "found",len(box_list),"boxes" # load the image associated with this box list # assume all the boxes have the same image name (they should) imgfilename = "IMAGEBIN/{0}BIN.png".format( box_list[0].document_id ) img = drawboxes.load_image(imgfilename) print img.mode, img.size num_cols,num_rows = img.size print "rows={0} cols={1}".format( num_rows, num_cols ) draw = ImageDraw.Draw(img) # starting strip as wide as the iamge with our base number of rows s = rects.Strip(width=num_cols, height=strip_rows ) box_strip_list = [ rects.Strip(box=box) for box in box_list ] data = np.asarray(img,dtype="uint8") print "shape=",data.shape # draw the ground truth in blue as sanity check (should see no blue in the # output image) for box_strip in box_strip_list : upper_left = box_strip.rect[0].x, box_strip.rect[0].y lower_right = box_strip.rect[2].x, box_strip.rect[2].y draw.rectangle( (upper_left,lower_right), outline="blue") # iterate the strip down the page, calculating all the box intersections # for each strip row = 0 strip_counter = 0 while row < num_rows : print "strip=",s upper_left = s.rect[0].x, s.rect[0].y lower_right = s.rect[2].x, s.rect[2].y draw.rectangle( (upper_left,lower_right), outline="green" ) # linear search all the boxes searching for those that match this strip box_intersect_list = [] for box_strip in box_strip_list : isect = rects.strip_intersect( box_strip, s ) if isect : print 'isect=',isect # PIL's Draw is x,y order upper_left = isect.rect[0].x, isect.rect[0].y lower_right = isect.rect[2].x, isect.rect[2].y draw.rectangle( (upper_left,lower_right), outline="red" ) # adjust the intersections so the new ground truth of the box # intersections starts at row=0 (making new images out of # strips so need ground truth for each image strip) for rect in isect.rect : # subtract out the starting Y position of upper left rect.y -= s.rect[0].y print "adjusted isect=",isect # save this intersection; we'll write to a new XML file box_intersect_list.append( isect ) # save the intersections as XML xmlfilename = "{0}_s{1}.xml".format( basename, strip_counter ) with open(xmlfilename,"w") as outfile : zone2xml.write_boxlist_to_xml( outfile, box_intersect_list ) print "wrote", xmlfilename s.next_strip() row += strip_rows strip_counter += 1 outfilename = "{0}_out.png".format( basename ) img.save(outfilename) print "wrote",outfilename