def estimateScale(attr2pat,img2flip,superRegion,rszFac,stopped): """ Input: dict attr2pat: maps {str attrval: [[str exmpl_imP, nparray imgpatch_i], ...]} tuple superRegion: float rszFac: fn stopped: Output: A scale. """ print 'estimating scale.' jobs=[] sStep=.05 sList=[] nProc=sh.numProcs() #nProc = 1 queue = mp.Queue() pool = mp.Pool(processes=nProc, initializer=templateSSWorker_init, initargs=[queue]) for attrval in attr2pat.keys(): jobs.append((attr2pat,attrval,superRegion,sStep,img2flip)) if nProc < 2: # default behavior for non multiproc machines for job in jobs: if stopped(): return False templateSSWorker.queue = queue templateSSWorker(job) else: print 'using ', nProc, ' processes' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW!" pool.map_async(templateSSWorker,jobs, callback=lambda x: imdone(it)) while not it[0]: if stopped(): pool.terminate() return False time.sleep(.1) pool.close() pool.join() # collect results while len(sList) < len(jobs): sList.append(queue.get()) #for job in jobs: # f1=job[5] # s=pickle.load(open(f1))['scale'] # sList.append(s) print sList scale=min(max(sList)+2*sStep,rszFac) #scale = 0.95 return scale
def digitParse(digit_hash, imList, bbSearch, nDigits, flipmap=None, hspace=20, rejected_hashes=None, accepted_hashes=None): """Runs NCC-based OCR on the images on imList. Input: dict digit_hash: maps {(str digit, str meta): img digit_exemplar} lst imList: list of imagepaths to search over bbSearch: [y1,y2,x1,x2] coords to search on nDigits: an integer that specifies how many digits there are. dict flipmap: maps {str imgpath: bool isflip} dict rejected_hashes: Contains all user rejections for each image, maps: {imgpath: {str digit: [((y1,y2,x1,x2),str side_i,bool isflip_i), ...]}} dict accepted_hashes: Contains all user accepts for each image, maps: {imgpath: {str digit: [((y1,y2,x1,x2),str side_i,bool isflip_i), ...]}} Output: A list of results of the form: [(imgpath_i, ocr_str_i, imgpatches_i, patchcoords_i, scores_i), ... ] """ digitList = digit_hash.values() patchExample = digitList[0] nProc = sh.numProcs() #nProc = 1 manager = mp.Manager() queue_progress = manager.Queue() # Used for MyGauge updates jobs = [(x, digit_hash, imList, bbSearch, nDigits, hspace, rejected_hashes, accepted_hashes, flipmap, queue_progress) for x in imList] if nProc < 2: results = [] for x in imList: results.append( process_one((x, digit_hash, imList, bbSearch, nDigits, hspace, rejected_hashes, accepted_hashes, flipmap, queue_progress))) job_status, job_metadata = queue_progress.get() if job_status == False: print "...Uhoh, imP={0} failed in digit-grouping computation.".format( job_metadata[0]) print " ErrMsg was:", job_metadata[1] if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_DIGITBASED, )) else: pool = mp.Pool(processes=nProc) #results = pool.map(process_one, [(x,digit_hash,imList,bbSearch,nDigits, hspace, rejected_hashes,accepted_hashes,flipmap) for x in imList]) result_async = pool.map_async(process_one, jobs) pool.close() i = 0 while i < len(jobs): job_status, job_metadata = queue_progress.get() if job_status == False: print "...Uhoh, imP={0} failed in digit-grouping computation.".format( job_metadata[0]) print " ErrMsg was:", job_metadata[1] if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_DIGITBASED, )) i += 1 pool.join() results = result_async.get() # TODO: Currently, any images that process_one() crashes on is signaled by # A 'False' value in RESULTS. We should explicitly handle these # cases (perhaps by having the caller quarantine these images). return results
def convertImagesMasterMAP(targetDir, targetMetaDir, imageMetaDir, jobs, img2bal, stopped, queue, result_queue, num_imgs2process, verbose=False, nProc=None, method_galign=GALIGN_NORMAL, method_lalign=LALIGN_NORMAL): """ Called by both single and multi-page elections. Performs Target Extraction. Input: str targetDir: Directory to dump extracted target images to. str targetMetaDir: Directory to store target metadata into. str imageMetaDir: Directory to store metadata for each Ballot, such as ballotpath, path to each extracted target, assoc'd blank ballot, isflipped. list jobs: [[tmppaths_i, bbs_i, imgpaths_i, targetDir_i, targetDiffDir_i, imageMetaDir_i, queue], ...] stopped: """ targetDiffDir=targetDir+'_diffs' print "...removing previous Target Extract results..." _t = time.time() if os.path.exists(targetDir): shutil.rmtree(targetDir) if os.path.exists(targetDiffDir): shutil.rmtree(targetDiffDir) if os.path.exists(targetMetaDir): shutil.rmtree(targetMetaDir) if os.path.exists(imageMetaDir): shutil.rmtree(imageMetaDir) dur = time.time() - _t print "...Finished removing previous Target Extract results ({0} s).".format(dur) create_dirs(targetDir) create_dirs(targetDiffDir) create_dirs(targetMetaDir) create_dirs(imageMetaDir) if nProc == None: nProc = sh.numProcs() #nProc = 1 num_jobs = len(jobs) if nProc < 2: print 'using only 1 processes' # default behavior for non multiproc machines for job in jobs: if stopped(): return False t0=time.clock(); convertImagesWorkerMAP(job) print time.clock()-t0 else: print 'using ', nProc, ' processes' pool=mp.Pool(processes=nProc) ''' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW!" ''' if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.nextjob", num_jobs) print "GOING UP TO", num_jobs #pool.map_async(convertImagesWorkerMAP,jobs,callback=lambda x: imdone(it)) pool.map_async(convertImagesWorkerMAP, jobs) cnt = 0 while cnt < len(jobs): val = queue.get(block=True) if val == True: if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick") cnt += 1 elif type(val) in (str, unicode): # Something went wrong! print " WARNING: detected a failed extract job {0}.".format(cnt) cnt += 1 pool.close() pool.join() print " (Finished processing targetextract jobs)" cnt = 0 avg_intensities = [] # [(path, float avg_intensity), ...] bal2targets = {} # maps {int ballotid: {int page: [targetsdir, targetmetadir, diffmetadir, imgmetadir]}} while cnt < num_imgs2process: (avg_intensities_cur, balP, page, target_rootdir, targetdiff_rootdir, imgmeta_rootdir) = result_queue.get(block=True) avg_intensities.extend(avg_intensities_cur) ballotid = img2bal[balP] #print "...finished ballotid {0}".format(ballotid) bal2targets.setdefault(ballotid, {})[page] = (target_rootdir, targetdiff_rootdir, imgmeta_rootdir) cnt += 1 print 'done.' return avg_intensities, bal2targets
def groupByAttr(bal2imgs, img2page, img2flip, attrName, side, attrMap, patchDestDir, stopped, proj, verbose=False, deleteall=True): """ Input: dict bal2imgs: maps {str ballotid: (sidepath_i, ...)} dict IMG2PAGE: dict IMG2FLIP: str attrName: the current attribute type int SIDE: dict attrMap: maps {str attrtype: {str attrval: (bb, str side, blankpath)}} str patchDestDir: A directory, i.e. 'extracted_precincts-ballottype', stores the extracted attribute image patches. fn stopped: obj proj: options: bool deleteall: if True, this will first remove all output files before computing. """ if deleteall: if os.path.exists(patchDestDir): shutil.rmtree(patchDestDir) create_dirs(patchDestDir) # maps {str attrval: [(str exmpl_imP, obj imagepatch), ...]} attr2pat = {} superRegion = (float('inf'), 0, float('inf'), 0) # attrValMap: {str attrval: (bb, str side, blankpath)} attrValMap = attrMap[attrName] # 0.) First, grab an exemplar patch for each attrval. Add them to # attr2pat. # multexemplars_map: maps {attrtype: {attrval: ((str patchpath_i, str blankpath_i, (x1,y1,x2,y2)), ...)}} multexemplars_map = pickle.load( open(pathjoin(proj.projdir_path, proj.multexemplars_map), 'rb')) exemplar_dict = multexemplars_map[attrName] for attrval, exemplars in exemplar_dict.iteritems(): # Sort, in order to provide a canonical ordering exemplars_sorted = sorted(exemplars, key=lambda t: t[0]) for (patchpath, blankpath, (x1, y1, x2, y2)) in exemplars_sorted: P = sh.standardImread(patchpath, flatten=True) attr2pat.setdefault(attrval, []).append((blankpath, P)) superRegion = sh.bbUnion(superRegion, (y1, y2, x1, x2)) for _attr, patches in attr2pat.iteritems(): print 'for attr {0}, there are {1} exemplars'.format( _attr, len(patches)) # 1.) Estimate smallest viable scale (for performance) if len(attr2pat) > 2: scale = estimateScale(attr2pat, img2flip, superRegion, sh.MAX_PRECINCT_PATCH_DIM, stopped) else: scale = sh.resizeOrNot(P.shape, sh.MAX_PRECINCT_PATCH_DIM) print 'ATTR: ', attrName, ': using starting scale:', scale # 2.) Generate jobs for the multiprocessing nProc = sh.numProcs() #nProc = 1 manager = mp.Manager() queue = manager.Queue() queue_progress = manager.Queue() # Used for MyGauge updates pool = mp.Pool(processes=nProc) jobs = [] for ballotid in bal2imgs.keys(): imgpaths = bal2imgs[ballotid] imgpaths_ordered = sorted(imgpaths, key=lambda imP: img2page[imP]) imgpath_in = imgpaths_ordered[side] isflip = img2flip[imgpath_in] jobs.append([ ballotid, [imgpath_in], attrName, superRegion, attr2pat, isflip, scale, patchDestDir, queue, queue_progress ]) print "Number of jobs:", len(jobs) # 3.) Perform jobs. if nProc < 2: # default behavior for non multiproc machines for job in jobs: if stopped(): return False groupImagesWorkerMAP(job) else: print 'using ', nProc, ' processes' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW! WOW" pool.map_async(groupImagesWorkerMAP, jobs, callback=lambda x: imdone(it)) i = 0 while i < len(jobs): job_status, job_ballotid = queue_progress.get( ) # Blocks until value is ready if job_status == False: print "...Uhoh, ballotid={0} had a grouping failure.".format( job_ballotid) if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_IMGBASED, )) i += 1 while not it[0]: if stopped(): print ' UHOH, stopped' pool.terminate() return False time.sleep(.1) print "HERE" pool.close() pool.join() print "GOT HERE." # list RESULTS: [[int ballotid, attrtype, dict outdict], ...] results = [] cnt = 0 while cnt < len(jobs): res = queue.get() if type(res) in (str, unicode): print "OH NO, badness happened." else: results.append(res) cnt += 1 print 'cnt: ', cnt # TODO: quarantine on grouping errors. For now, just let alignment check handle it print 'ATTR: ', attrName, ': done' return results
def estimateScale(attr2pat, img2flip, superRegion, rszFac, stopped): """ Input: dict attr2pat: maps {str attrval: [[str exmpl_imP, nparray imgpatch_i], ...]} tuple superRegion: float rszFac: fn stopped: Output: A scale. """ print 'estimating scale.' jobs = [] sStep = .05 sList = [] nProc = sh.numProcs() #nProc = 1 queue = mp.Queue() pool = mp.Pool(processes=nProc, initializer=templateSSWorker_init, initargs=[queue]) for attrval in attr2pat.keys(): jobs.append((attr2pat, attrval, superRegion, sStep, img2flip)) if nProc < 2: # default behavior for non multiproc machines for job in jobs: if stopped(): return False templateSSWorker.queue = queue templateSSWorker(job) else: print 'using ', nProc, ' processes' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW!" pool.map_async(templateSSWorker, jobs, callback=lambda x: imdone(it)) while not it[0]: if stopped(): pool.terminate() return False time.sleep(.1) pool.close() pool.join() # collect results while len(sList) < len(jobs): sList.append(queue.get()) #for job in jobs: # f1=job[5] # s=pickle.load(open(f1))['scale'] # sList.append(s) print sList scale = min(max(sList) + 2 * sStep, rszFac) #scale = 0.95 return scale
def convertImagesMasterMAP(targetDir, targetMetaDir, imageMetaDir, jobs, img2bal, stopped, queue, result_queue, num_imgs2process, verbose=False, nProc=None, method_galign=GALIGN_NORMAL, method_lalign=LALIGN_NORMAL): """ Called by both single and multi-page elections. Performs Target Extraction. Input: str targetDir: Directory to dump extracted target images to. str targetMetaDir: Directory to store target metadata into. str imageMetaDir: Directory to store metadata for each Ballot, such as ballotpath, path to each extracted target, assoc'd blank ballot, isflipped. list jobs: [[tmppaths_i, bbs_i, imgpaths_i, targetDir_i, targetDiffDir_i, imageMetaDir_i, queue], ...] stopped: """ targetDiffDir = targetDir + '_diffs' print "...removing previous Target Extract results..." if os.path.exists(targetDir): shutil.rmtree(targetDir) if os.path.exists(targetDiffDir): shutil.rmtree(targetDiffDir) if os.path.exists(targetMetaDir): shutil.rmtree(targetMetaDir) if os.path.exists(imageMetaDir): shutil.rmtree(imageMetaDir) print "...Finished removing previous Target Extract results" create_dirs(targetDir) create_dirs(targetDiffDir) create_dirs(targetMetaDir) create_dirs(imageMetaDir) if nProc is None: nProc = sh.numProcs() # nProc = 1 num_jobs = len(jobs) if nProc < 2: print 'using only 1 processes' # default behavior for non multiproc machines for job in jobs: if stopped(): return False t0 = time.clock() convertImagesWorkerMAP(job) print time.clock() - t0 else: print 'using ', nProc, ' processes' pool = mp.Pool(processes=nProc) ''' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW!" ''' if wx.App.IsMainLoopRunning(): util.MyGauge.all_next_job(num_jobs) print "GOING UP TO", num_jobs # pool.map_async(convertImagesWorkerMAP,jobs,callback=lambda x: imdone(it)) pool.map_async(convertImagesWorkerMAP, jobs) cnt = 0 while cnt < len(jobs): val = queue.get(block=True) if val == True: if wx.App.IsMainLoopRunning(): util.MyGauge.all_tick() cnt += 1 elif type(val) in (str, unicode): # Something went wrong! print " WARNING: detected a failed extract job {0}.".format( cnt) cnt += 1 pool.close() pool.join() print " (Finished processing targetextract jobs)" cnt = 0 avg_intensities = [] # [(path, float avg_intensity), ...] # maps {int ballotid: {int page: [targetsdir, targetmetadir, diffmetadir, # imgmetadir]}} bal2targets = {} while cnt < num_imgs2process: (avg_intensities_cur, balP, page, target_rootdir, targetdiff_rootdir, imgmeta_rootdir) = result_queue.get(block=True) avg_intensities.extend(avg_intensities_cur) ballotid = img2bal[balP] # print "...finished ballotid {0}".format(ballotid) bal2targets.setdefault(ballotid, {})[page] = (target_rootdir, targetdiff_rootdir, imgmeta_rootdir) cnt += 1 print 'done.' return avg_intensities, bal2targets
def digitParse(digit_hash,imList,bbSearch,nDigits, flipmap=None, hspace=20, rejected_hashes=None, accepted_hashes=None): """Runs NCC-based OCR on the images on imList. Input: dict digit_hash: maps {(str digit, str meta): img digit_exemplar} lst imList: list of imagepaths to search over bbSearch: [y1,y2,x1,x2] coords to search on nDigits: an integer that specifies how many digits there are. dict flipmap: maps {str imgpath: bool isflip} dict rejected_hashes: Contains all user rejections for each image, maps: {imgpath: {str digit: [((y1,y2,x1,x2),str side_i,bool isflip_i), ...]}} dict accepted_hashes: Contains all user accepts for each image, maps: {imgpath: {str digit: [((y1,y2,x1,x2),str side_i,bool isflip_i), ...]}} Output: A list of results of the form: [(imgpath_i, ocr_str_i, imgpatches_i, patchcoords_i, scores_i), ... ] """ digitList = digit_hash.values(); patchExample = digitList[0] nProc=sh.numProcs() #nProc = 1 manager = mp.Manager() queue_progress = manager.Queue() # Used for MyGauge updates jobs = [(x,digit_hash,imList,bbSearch,nDigits, hspace, rejected_hashes,accepted_hashes,flipmap, queue_progress) for x in imList] if nProc < 2: results = [] for x in imList: results.append(process_one((x,digit_hash,imList,bbSearch,nDigits,hspace,rejected_hashes,accepted_hashes,flipmap, queue_progress))) job_status, job_metadata = queue_progress.get() if job_status == False: print "...Uhoh, imP={0} failed in digit-grouping computation.".format(job_metadata[0]) print " ErrMsg was:", job_metadata[1] if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_DIGITBASED,)) else: pool = mp.Pool(processes=nProc) #results = pool.map(process_one, [(x,digit_hash,imList,bbSearch,nDigits, hspace, rejected_hashes,accepted_hashes,flipmap) for x in imList]) result_async = pool.map_async(process_one, jobs) pool.close() i = 0 while i < len(jobs): job_status, job_metadata = queue_progress.get() if job_status == False: print "...Uhoh, imP={0} failed in digit-grouping computation.".format(job_metadata[0]) print " ErrMsg was:", job_metadata[1] if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_DIGITBASED,)) i += 1 pool.join() results = result_async.get() # TODO: Currently, any images that process_one() crashes on is signaled by # A 'False' value in RESULTS. We should explicitly handle these # cases (perhaps by having the caller quarantine these images). return results
def groupByAttr(bal2imgs, img2page, img2flip, attrName, side, attrMap, patchDestDir, stopped, proj, verbose=False, deleteall=True): """ Input: dict bal2imgs: maps {str ballotid: (sidepath_i, ...)} dict IMG2PAGE: dict IMG2FLIP: str attrName: the current attribute type int SIDE: dict attrMap: maps {str attrtype: {str attrval: (bb, str side, blankpath)}} str patchDestDir: A directory, i.e. 'extracted_precincts-ballottype', stores the extracted attribute image patches. fn stopped: obj proj: options: bool deleteall: if True, this will first remove all output files before computing. """ if deleteall: if os.path.exists(patchDestDir): shutil.rmtree(patchDestDir) create_dirs(patchDestDir) # maps {str attrval: [(str exmpl_imP, obj imagepatch), ...]} attr2pat={} superRegion=(float('inf'),0,float('inf'),0) # attrValMap: {str attrval: (bb, str side, blankpath)} attrValMap=attrMap[attrName] # 0.) First, grab an exemplar patch for each attrval. Add them to # attr2pat. # multexemplars_map: maps {attrtype: {attrval: ((str patchpath_i, str blankpath_i, (x1,y1,x2,y2)), ...)}} multexemplars_map = pickle.load(open(pathjoin(proj.projdir_path, proj.multexemplars_map), 'rb')) exemplar_dict = multexemplars_map[attrName] for attrval, exemplars in exemplar_dict.iteritems(): # Sort, in order to provide a canonical ordering exemplars_sorted = sorted(exemplars, key=lambda t: t[0]) for (patchpath, blankpath, (x1,y1,x2,y2)) in exemplars_sorted: P = sh.standardImread(patchpath, flatten=True) attr2pat.setdefault(attrval, []).append((blankpath, P)) superRegion = sh.bbUnion(superRegion, (y1,y2,x1,x2)) for _attr, patches in attr2pat.iteritems(): print 'for attr {0}, there are {1} exemplars'.format(_attr, len(patches)) # 1.) Estimate smallest viable scale (for performance) if len(attr2pat)>2: scale = estimateScale(attr2pat,img2flip,superRegion,sh.MAX_PRECINCT_PATCH_DIM,stopped) else: scale = sh.resizeOrNot(P.shape,sh.MAX_PRECINCT_PATCH_DIM); print 'ATTR: ', attrName,': using starting scale:',scale # 2.) Generate jobs for the multiprocessing nProc=sh.numProcs() #nProc = 1 manager = mp.Manager() queue = manager.Queue() queue_progress = manager.Queue() # Used for MyGauge updates pool = mp.Pool(processes=nProc) jobs = [] for ballotid in bal2imgs.keys(): imgpaths = bal2imgs[ballotid] imgpaths_ordered = sorted(imgpaths, key=lambda imP: img2page[imP]) imgpath_in = imgpaths_ordered[side] isflip = img2flip[imgpath_in] jobs.append([ballotid, [imgpath_in], attrName, superRegion, attr2pat, isflip, scale, patchDestDir, queue, queue_progress]) print "Number of jobs:", len(jobs) # 3.) Perform jobs. if nProc < 2: # default behavior for non multiproc machines for job in jobs: if stopped(): return False groupImagesWorkerMAP(job) else: print 'using ', nProc, ' processes' it = [False] def imdone(x): it[0] = True print "I AM DONE NOW! WOW" pool.map_async(groupImagesWorkerMAP,jobs, callback=lambda x: imdone(it)) i = 0 while i < len(jobs): job_status, job_ballotid = queue_progress.get() # Blocks until value is ready if job_status == False: print "...Uhoh, ballotid={0} had a grouping failure.".format(job_ballotid) if wx.App.IsMainLoopRunning(): wx.CallAfter(Publisher().sendMessage, "signals.MyGauge.tick", (JOBID_GROUPING_IMGBASED,)) i += 1 while not it[0]: if stopped(): print ' UHOH, stopped' pool.terminate() return False time.sleep(.1) print "HERE" pool.close() pool.join() print "GOT HERE." # list RESULTS: [[int ballotid, attrtype, dict outdict], ...] results = [] cnt = 0 while cnt < len(jobs): res = queue.get() if type(res) in (str, unicode): print "OH NO, badness happened." else: results.append(res) cnt += 1 print 'cnt: ', cnt # TODO: quarantine on grouping errors. For now, just let alignment check handle it print 'ATTR: ', attrName, ': done' return results