def decode_ballots(ballots, mark_path, manager, queue): """ Decode ES&S barcode for all ballots by calling multiprocessing module. Input: ballots : {int ballotID: [imgpath_side0, ...]} mark_path : path to example timing mark representing '1' in bitstring manager : used for multiprocessing queue : used for multiprocessing Output: decoded_results: {ballotid: [(bitstring, is_flipped, bit_locations), ...]} """ try: decoded_results = partask.do_partask(_do_decode_ballots, ballots, _args=mark_path, combfn='dict', manager=manager, pass_queue=queue, N=None) print 'finished decoding:' return decoded_results except: traceback.print_exc() return None
def minmax_cv_par(imgpaths, do_align=False, rszFac=1.0, trfm_type='rigid', minArea=np.power(2, 16), bbs_map=None, numProcs=None, imgCache=None): """ A parallel-wrapper for minmax_cv_v2. Note: For some reason, this is ~10X slower than just calling minmax_cv and doing it in a single process. Not sure why... NOTE: Currently deprecated. We switched back to using a numpy-based overlay-generation function, for ease (make_overlay_minmax) """ if numProcs == None: numProcs = multiprocessing.cpu_count() if numProcs == 1: return minmax_cv(imgpaths, do_align=do_align, rszFac=rszFac, trfm_type=type, minArea=minArea, bbs_map=bbs_map, imgCache=imgCache) imgpaths = imgpaths[:] Iref_imP = imgpaths.pop() Imin_str, Imax_str, size = partask.do_partask(_minmax_cv_v2_wrapper, imgpaths, _args=(Iref_imP, do_align, rszFac, type, minArea, bbs_map), init=(None, None, None), combfn=_minmax_combfn, N=numProcs) return str2iplimage(Imin_str, size), str2iplimage(Imax_str, size)
def decode_ballots(self, ballots, manager=None, queue=None, skipVerify=True, *args, **kwargs): return partask.do_partask(_decode_ballots, ballots, _args=(TEMPLATE_PATH, COLMARK_PATH, skipVerify), combfn=_combfn, init=({}, {}, {}, [], []), manager=manager, pass_queue=queue, N=None)
def decode_ballots(ballots, topbot_paths, manager, queue, skipVerify=True, N=None, cache=None): t = time.time() decoded_results = partask.do_partask(_do_decode_ballots, ballots, _args=(topbot_paths, skipVerify, cache), combfn='dict', manager=manager, pass_queue=queue, N=N) dur = time.time() - t print "...finished decoding {0} ballots ({1:.2f}s, {2:.5f} secs per ballot)".format(len(ballots), dur, dur / float(len(ballots))) return decoded_results
def decode_ballots(ballots, topbot_paths, manager, queue, skipVerify=True, N=None, cache=None): decoded_results = partask.do_partask(_do_decode_ballots, ballots, _args=(topbot_paths, skipVerify, cache), combfn='dict', manager=manager, pass_queue=queue, N=N) debug("...finished decoding {0} ballots", len(ballots)) return decoded_results
def extract(imgpatches, do_threshold=None, manager=None, queue_mygauge=None): """ Input: dict IMGPATCHES: {imgpath: [((x1,y1,x2,y2), isflip, outpath, tag), ...]} obj MANAGER: Pass if you want to do MyGauge-related updates (to be used with an associated Queue instance, QUEUE_MYGAUGE. obj QUEUE_MYGAUGE: The Queue instance (owned by MANAGER) which is used to communicate cross-process to a MyGauge instance. Output: dict IMG2PATCH: {(imgpath, tag): patchpath}, dict PATCH2STUFF. {patchpath: (imgpath, (x1,y1,x2,y2), tag)}. """ return partask.do_partask(_extract_patches, imgpatches, _args=(do_threshold, queue_mygauge), manager=manager, combfn=_combfn, init=({}, {}), N=None)
def decode_ballots(ballots, topbot_paths, manager, queue, skipVerify=True, N=None, cache=None): t = time.time() decoded_results = partask.do_partask(_do_decode_ballots, ballots, _args=(topbot_paths, skipVerify, cache), combfn='dict', manager=manager, pass_queue=queue, N=N) dur = time.time() - t print "...finished decoding {0} ballots ({1:.2f}s, {2:.5f} secs per ballot)".format( len(ballots), dur, dur / float(len(ballots))) return decoded_results
def do_extract_attr_patches(proj, blanks, img2flip): """Extract all attribute patches from all blank ballots into the specified outdir. Saves them to: <projdir>/extract_attrs_templates/ATTRTYPE/*.png Input: list BLANKS: List of [[imgP_side0, imgP_side1, ...], ...] Output: (dict mapping, dict inv_mapping, where: mapping is {imgpath: {str attrtype: str patchpath}} inv_mapping is {str patchpath: (imgpath, attrtype)} """ mapping, invmapping, blank2attrpatch, invb2ap = partask.do_partask( extract_attr_patches, blanks, _args=(proj, img2flip), combfn=_extract_combfn, init=({}, {}, {}, {}), N=1) blank2attrpatchP = pathjoin(proj.projdir_path, proj.blank2attrpatch) pickle.dump(blank2attrpatch, open(blank2attrpatchP, 'wb')) invblank2attrpatchP = pathjoin(proj.projdir_path, proj.invblank2attrpatch) pickle.dump(invb2ap, open(invblank2attrpatchP, 'wb')) return mapping, invmapping
def do_extract_attr_patches(proj, blanks, img2flip): """Extract all attribute patches from all blank ballots into the specified outdir. Saves them to: <projdir>/extract_attrs_templates/ATTRTYPE/*.png Input: list BLANKS: List of [[imgP_side0, imgP_side1, ...], ...] Output: (dict mapping, dict inv_mapping, where: mapping is {imgpath: {str attrtype: str patchpath}} inv_mapping is {str patchpath: (imgpath, attrtype)} """ mapping, invmapping, blank2attrpatch, invb2ap = partask.do_partask(extract_attr_patches, blanks, _args=(proj, img2flip), combfn=_extract_combfn, init=({}, {}, {}, {}), N=1) blank2attrpatchP = pathjoin(proj.projdir_path, proj.blank2attrpatch) pickle.dump(blank2attrpatch, open(blank2attrpatchP, 'wb')) invblank2attrpatchP = pathjoin(proj.projdir_path, proj.invblank2attrpatch) pickle.dump(invb2ap, open(invblank2attrpatchP, 'wb')) return mapping, invmapping
def decode_ballots(self, ballots, manager=None, queue=None, skipVerify=True, *args, **kwargs): img2decoding, flipmap, mark_bbs_map, err_imgpaths, ioerr_imgpaths, backsmap = partask.do_partask( _decode_ballots, ballots, _args=(sequoia.ZERO_IMGPATH, sequoia.ONE_IMGPATH, sequoia.SIDESYM_IMGPATH, self.proj.num_pages, skipVerify), combfn=_combfn, init=({}, {}, {}, [], [], {}), manager=manager, pass_queue=queue, N=None) # BACKSMAP: maps {int ballotID: [imgpath_i, ...]}. Stores all backside # imagepaths for each ballotid self.backsmap = backsmap return (img2decoding, flipmap, mark_bbs_map, err_imgpaths, ioerr_imgpaths)
def main(): def isimgext(f): return os.path.splitext(f)[1].lower() in ('.png', '.tif', '.tiff', '.jpg', '.jpeg') args = sys.argv[1:] imgsdir = args[0] vendor = args[1] outdir = args[2] try: N = int(args[3]) except: N = -1 if 'align' in args: # Align the barcodes when computing Min/Max overlays do_align = True else: do_align = False if 'do_cpyimg' in args: # Copy the entire images to OUTDIR (don't do this for large N!) do_cpyimg = True else: do_cpyimg = False if 'just_grouping' in args: # Just compute the barcodes + group, don't compute overlays just_grouping = True else: just_grouping = False if args[-2] == 'load': grouping = pickle.load(open(args[-1], 'rb')) else: grouping = None do_profile = True if 'profile' in args else False imgpaths = [] cnt = 0 for dirpath, dirnames, filenames in os.walk(imgsdir): for imgname in [f for f in filenames if isimgext(f)]: if N > 0 and cnt >= N: break imgpath = os.path.join(dirpath, imgname) imgpaths.append(imgpath) cnt += 1 if N > 0 and cnt >= N: break print "Starting partition_imgs..." t = time.time() if do_profile: cProfile.runctx('partition_imgs(imgpaths, vendor=vendor)', {}, {'imgpaths': imgpaths, 'vendor': vendor, 'partition_imgs': partition_imgs}) return if grouping == None: grouping = partask.do_partask(_do_partition_imgs, imgpaths, _args=(vendor, None), combfn="dict", N=None) try: os.makedirs(outdir) except: pass pickle.dump(grouping, open(os.path.join(outdir, 'grouping.p'), 'wb'), pickle.HIGHEST_PROTOCOL) dur = time.time() - t print "...Finished partition_imgs ({0} s).".format(dur) print " Avg. Time per ballot: {0} s".format(dur / len(imgpaths)) print "Copying groups to outdir {0}...".format(outdir) t = time.time() errcount = 0 for barcodes, group in grouping.iteritems(): if len(group) == 1: errcount += 1 if ("ERR0" in barcodes or "ERR1" in barcodes) else 0 continue elif "ERR0" in barcodes or "ERR1" in barcodes: #continue errcount += len(group) pass if just_grouping: continue bcs = '_'.join([thing for thing in barcodes if type(thing) == str]) rootdir = os.path.join(outdir, bcs) try: os.makedirs(rootdir) except: pass Imins = [None for _ in barcodes] Imaxes = [None for _ in barcodes] Irefs = [None for _ in barcodes] for i, (imgpath, isflip, bbs) in enumerate(group): if do_cpyimg: imgname = os.path.split(imgpath)[1] outpath_foo = os.path.join(rootdir, imgname) shutil.copy(imgpath, outpath_foo) img = cv.LoadImage(imgpath, cv.CV_LOAD_IMAGE_GRAYSCALE) if isflip: cv.Flip(img, img, flipMode=-1) for j, bb in enumerate(bbs): outpath = os.path.join(rootdir, str(j), "{0}_{1}.png".format(i, j)) try: os.makedirs(os.path.split(outpath)[0]) except: pass x, y, w, h = bb cv.SetImageROI(img, (x, y, w, h)) wbig, hbig = int(round(w*2.0)), int(round(h*2.0)) bcBig = cv.CreateImage((wbig, hbig), img.depth, img.channels) cv.Resize(img, bcBig, interpolation=cv.CV_INTER_CUBIC) cv.SaveImage(outpath, bcBig) if Imins[j] == None: Imins[j] = cv.CloneImage(bcBig) Imaxes[j] = cv.CloneImage(bcBig) if do_align: Irefs[j] = make_overlays.iplimage2np(cv.CloneImage(bcBig)) / 255.0 else: bcBig_sized = make_overlays.matchsize(bcBig, Imins[j]) if do_align: tmp_np = make_overlays.iplimage2np(cv.CloneImage(bcBig_sized)) / 255.0 H, Ireg, err = imagesAlign.imagesAlign(tmp_np, Irefs[j], fillval=0.2, rszFac=0.75) Ireg *= 255.0 Ireg = Ireg.astype('uint8') bcBig_sized = make_overlays.np2iplimage(Ireg) cv.Min(bcBig_sized, Imins[j], Imins[j]) cv.Max(bcBig_sized, Imaxes[j], Imaxes[j]) for idx, Imin in enumerate(Imins): Imax = Imaxes[idx] cv.SaveImage(os.path.join(rootdir, "_{0}_minimg.png".format(idx)), Imin) cv.SaveImage(os.path.join(rootdir, "_{0}_maximg.png".format(idx)), Imax) dur = time.time() - t print "...Finished Copying groups to outdir {0} ({1} s).".format(outdir, dur) print "Number of error ballots:", errcount print "Done."
def partition_imgs_par(imgpaths, vendor="hart", queue=None): grouping = partask.do_partask(_do_partition_imgs, imgpaths, _args=(vendor, queue), combfn="dict", N=None)
def do_extract_digitbased_patches(proj, C, MIN, MAX): """ Extracts all digit-based attribute patches, and stores them in the proj.extracted_digitpatch_dir directory. Input: obj proj: (The following args are used if the digitattr varies within a partition) int C: Suggested fraction of ballots to randomly sample. int MIN, MAX: Min./Max. number of ballots to randomly sample. Output: Returns a dict mapping {str patchpath: (imgpath, attrtype, bb, int side)} """ # all_attrtypes is a list of dicts (marshall'd AttributeBoxes) all_attrtypes = pickle.load(open(proj.ballot_attributesfile, 'rb')) digit_attrtypes = [] # list of (attrs,x1,y1,x2,y2,side) for attrbox_dict in all_attrtypes: if attrbox_dict['is_digitbased']: attrs = attrbox_dict['attrs'] x1 = attrbox_dict['x1'] y1 = attrbox_dict['y1'] x2 = attrbox_dict['x2'] y2 = attrbox_dict['y2'] side = attrbox_dict['side'] is_part_consistent = attrbox_dict['grp_per_partition'] digit_attrtypes.append( (attrs, x1, y1, x2, y2, side, is_part_consistent)) if len(digit_attrtypes) >= 2: raise Exception("Only one digit attribute may exist.") bal2imgs = pickle.load(open(proj.ballot_to_images, 'rb')) # PARTITIONS_MAP: maps {int partitionID: [int ballotID, ...]} partitions_map = pickle.load( open(pathjoin(proj.projdir_path, proj.partitions_map), 'rb')) img2page = pickle.load( open(pathjoin(proj.projdir_path, proj.image_to_page), 'rb')) img2flip = pickle.load( open(pathjoin(proj.projdir_path, proj.image_to_flip), 'rb')) if digit_attrtypes[0][6]: # Digit attr is consistent within each partition -- only choose # one ballot from each partition chosen_bids = set() for partitionid, ballotids in partitions_map.iteritems(): if ballotids: chosen_bids.add(ballotids[0]) print "...Digit attribute is consistent w.r.t partitions, chose {0} ballots".format( len(chosen_bids)) else: # Randomly choose ballots from the election. candidate_balids = sum(partitions_map.values(), []) N = max(min(int(round(len(candidate_balids) * C)), MAX), MIN) # If MIN < len(B), avoid oversampling N = min(N, len(candidate_balids)) chosen_bids = set(random.sample(candidate_balids, N)) print "...Digit attribute is NOT consistent w.r.t partitions, chose {0} ballots".format( len(chosen_bids)) partition_exmpls = pickle.load( open(pathjoin(proj.projdir_path, proj.partition_exmpls), 'rb')) tasks = [] # list [(int ballotID, [imgpath_side0, ...]), ...] for ballotid in chosen_bids: imgpaths = bal2imgs[ballotid] imgpaths_ordered = sorted(imgpaths, key=lambda imP: img2page[imP]) tasks.append((ballotid, imgpaths_ordered)) return partask.do_partask(extract_digitbased_patches, tasks, _args=(digit_attrtypes, proj, img2flip), combfn=_my_combfn, init={}, pass_idx=True, N=None)
def do_extract_digitbased_patches(proj, C, MIN, MAX): """ Extracts all digit-based attribute patches, and stores them in the proj.extracted_digitpatch_dir directory. Input: obj proj: (The following args are used if the digitattr varies within a partition) int C: Suggested fraction of ballots to randomly sample. int MIN, MAX: Min./Max. number of ballots to randomly sample. Output: Returns a dict mapping {str patchpath: (imgpath, attrtype, bb, int side)} """ # all_attrtypes is a list of dicts (marshall'd AttributeBoxes) all_attrtypes = pickle.load(open(proj.ballot_attributesfile, 'rb')) digit_attrtypes = [] # list of (attrs,x1,y1,x2,y2,side) for attrbox_dict in all_attrtypes: if attrbox_dict['is_digitbased']: attrs = attrbox_dict['attrs'] x1 = attrbox_dict['x1'] y1 = attrbox_dict['y1'] x2 = attrbox_dict['x2'] y2 = attrbox_dict['y2'] side = attrbox_dict['side'] is_part_consistent = attrbox_dict['grp_per_partition'] digit_attrtypes.append((attrs,x1,y1,x2,y2,side,is_part_consistent)) if len(digit_attrtypes) >= 2: raise Exception("Only one digit attribute may exist.") bal2imgs = pickle.load(open(proj.ballot_to_images, 'rb')) # PARTITIONS_MAP: maps {int partitionID: [int ballotID, ...]} partitions_map = pickle.load(open(pathjoin(proj.projdir_path, proj.partitions_map), 'rb')) img2page = pickle.load(open(pathjoin(proj.projdir_path, proj.image_to_page), 'rb')) img2flip = pickle.load(open(pathjoin(proj.projdir_path, proj.image_to_flip), 'rb')) if digit_attrtypes[0][6]: # Digit attr is consistent within each partition -- only choose # one ballot from each partition chosen_bids = set() for partitionid, ballotids in partitions_map.iteritems(): if ballotids: chosen_bids.add(ballotids[0]) print "...Digit attribute is consistent w.r.t partitions, chose {0} ballots".format(len(chosen_bids)) else: # Randomly choose ballots from the election. candidate_balids = sum(partitions_map.values(), []) N = max(min(int(round(len(candidate_balids) * C)), MAX), MIN) N = min(N, len(candidate_balids)) # If MIN < len(B), avoid oversampling chosen_bids = set(random.sample(candidate_balids, N)) print "...Digit attribute is NOT consistent w.r.t partitions, chose {0} ballots".format(len(chosen_bids)) partition_exmpls = pickle.load(open(pathjoin(proj.projdir_path, proj.partition_exmpls), 'rb')) tasks = [] # list [(int ballotID, [imgpath_side0, ...]), ...] for ballotid in chosen_bids: imgpaths = bal2imgs[ballotid] imgpaths_ordered = sorted(imgpaths, key=lambda imP: img2page[imP]) tasks.append((ballotid, imgpaths_ordered)) return partask.do_partask(extract_digitbased_patches, tasks, _args=(digit_attrtypes, proj, img2flip), combfn=_my_combfn, init={}, pass_idx=True, N=None)
def main(): def isimgext(f): return os.path.splitext(f)[1].lower() in ('.png', '.tif', '.tiff', '.jpg', '.jpeg') args = sys.argv[1:] imgsdir = args[0] vendor = args[1] outdir = args[2] try: N = int(args[3]) except: N = -1 if 'align' in args: # Align the barcodes when computing Min/Max overlays do_align = True else: do_align = False if 'do_cpyimg' in args: # Copy the entire images to OUTDIR (don't do this for large N!) do_cpyimg = True else: do_cpyimg = False if 'just_grouping' in args: # Just compute the barcodes + group, don't compute overlays just_grouping = True else: just_grouping = False if args[-2] == 'load': grouping = pickle.load(open(args[-1], 'rb')) else: grouping = None do_profile = True if 'profile' in args else False imgpaths = [] cnt = 0 for dirpath, dirnames, filenames in os.walk(imgsdir): for imgname in [f for f in filenames if isimgext(f)]: if N > 0 and cnt >= N: break imgpath = os.path.join(dirpath, imgname) imgpaths.append(imgpath) cnt += 1 if N > 0 and cnt >= N: break print "Starting partition_imgs..." t = time.time() if do_profile: cProfile.runctx('partition_imgs(imgpaths, vendor=vendor)', {}, { 'imgpaths': imgpaths, 'vendor': vendor, 'partition_imgs': partition_imgs }) return if grouping == None: grouping = partask.do_partask(_do_partition_imgs, imgpaths, _args=(vendor, None), combfn="dict", N=None) try: os.makedirs(outdir) except: pass pickle.dump(grouping, open(os.path.join(outdir, 'grouping.p'), 'wb'), pickle.HIGHEST_PROTOCOL) dur = time.time() - t print "...Finished partition_imgs ({0} s).".format(dur) print " Avg. Time per ballot: {0} s".format(dur / len(imgpaths)) print "Copying groups to outdir {0}...".format(outdir) t = time.time() errcount = 0 for barcodes, group in grouping.iteritems(): if len(group) == 1: errcount += 1 if ("ERR0" in barcodes or "ERR1" in barcodes) else 0 continue elif "ERR0" in barcodes or "ERR1" in barcodes: #continue errcount += len(group) pass if just_grouping: continue bcs = '_'.join([thing for thing in barcodes if type(thing) == str]) rootdir = os.path.join(outdir, bcs) try: os.makedirs(rootdir) except: pass Imins = [None for _ in barcodes] Imaxes = [None for _ in barcodes] Irefs = [None for _ in barcodes] for i, (imgpath, isflip, bbs) in enumerate(group): if do_cpyimg: imgname = os.path.split(imgpath)[1] outpath_foo = os.path.join(rootdir, imgname) shutil.copy(imgpath, outpath_foo) img = cv.LoadImage(imgpath, cv.CV_LOAD_IMAGE_GRAYSCALE) if isflip: cv.Flip(img, img, flipMode=-1) for j, bb in enumerate(bbs): outpath = os.path.join(rootdir, str(j), "{0}_{1}.png".format(i, j)) try: os.makedirs(os.path.split(outpath)[0]) except: pass x, y, w, h = bb cv.SetImageROI(img, (x, y, w, h)) wbig, hbig = int(round(w * 2.0)), int(round(h * 2.0)) bcBig = cv.CreateImage((wbig, hbig), img.depth, img.channels) cv.Resize(img, bcBig, interpolation=cv.CV_INTER_CUBIC) cv.SaveImage(outpath, bcBig) if Imins[j] == None: Imins[j] = cv.CloneImage(bcBig) Imaxes[j] = cv.CloneImage(bcBig) if do_align: Irefs[j] = make_overlays.iplimage2np( cv.CloneImage(bcBig)) / 255.0 else: bcBig_sized = make_overlays.matchsize(bcBig, Imins[j]) if do_align: tmp_np = make_overlays.iplimage2np( cv.CloneImage(bcBig_sized)) / 255.0 H, Ireg, err = imagesAlign.imagesAlign(tmp_np, Irefs[j], fillval=0.2, rszFac=0.75) Ireg *= 255.0 Ireg = Ireg.astype('uint8') bcBig_sized = make_overlays.np2iplimage(Ireg) cv.Min(bcBig_sized, Imins[j], Imins[j]) cv.Max(bcBig_sized, Imaxes[j], Imaxes[j]) for idx, Imin in enumerate(Imins): Imax = Imaxes[idx] cv.SaveImage(os.path.join(rootdir, "_{0}_minimg.png".format(idx)), Imin) cv.SaveImage(os.path.join(rootdir, "_{0}_maximg.png".format(idx)), Imax) dur = time.time() - t print "...Finished Copying groups to outdir {0} ({1} s).".format( outdir, dur) print "Number of error ballots:", errcount print "Done."