def main(): NextEqualsPrefix = "Next=" MorePrompt = ":" NextToProcessFile = "" miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(util.root("extraction.log")) # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: %s check %s !", (const.layout_brand, cfg_file)) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database!") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() NextToProcessFile = util.root("nexttoprocess.txt") count_to_process = 0 file_problem = False while True: log.debug("Top of loop.") next_ballot_number = int(util.readfrom(NextToProcessFile)) log.debug("Read %d from %s" % (next_ballot_number, NextToProcessFile)) if count_to_process == 0: # send prompt to controlling process, "READY:" or "+ for SKIP:" if file_problem: file_problem = False # do not remove space after %06d print "Next=%06d , + to SKIP:" % (next_ballot_number, ) else: # do not remove space after %06d print "Next=%06d , READY:" % (next_ballot_number, ) sys.stdout.flush() # wait here until get_count_to_process returns # it will wait on input instruction from stdio try: count_to_process = get_count_to_process( next_ballot_number, log) except DoIncrementException, e: log.debug("Do increment exception") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) log.debug( "Wrote %d to next_ballot_number, count to process is %d" % (next_ballot_number + const.num_pages, count_to_process)) count_to_process = 0 log.debug("Setting count to process to 0.") continue # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: log.info("Checking for path.") if not os.path.exists(filename): log.info("File not present.") errmsg = "File %s not present or available!!!" % ( base(filename), ) log.info(errmsg.replace("!!!", "")) print errmsg sys.stdout.flush() raise FileNotPresentException(filename) log.info("Path found.") #Processing log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: file_problem = True print "FileNotPresentException" sys.stdout.flush() log.info("FileNotPresentException occurred") continue
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) #create initial top level dirs, if they do not exist for p in ("%s" % ("templates"), "%s%d" % ("template_images", os.getpid()), "%s%d" % ("composite_images", os.getpid()), "results", "proc", "errors"): util.mkdirp(util.root(p)) next_ballot = next.File(util.root("nexttoprocess.txt"), const.num_pages) try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) # allow all instances to share a common template location, # though need per-pid locs for template_images and composite_images cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(const.dbname, const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() total_proc, total_unproc = 0, 0 base = os.path.basename # While ballot images exist in the directory specified in tevs.cfg, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. Repeat. #from guppy import hpy;hp=hpy();hp.setref();import gc;gc.disable();gc.collect();hp.setref() try: for n in next_ballot: gc.collect() unprocs = [incomingn(n + m) for m in range(const.num_pages)] if not os.path.exists(unprocs[0]): miss_counter += 1 log.info( base(unprocs[0]) + " does not exist. No more records to process") if miss_counter > 10: break continue #for i, f in enumerate(unprocs[1:]): # if not os.path.exists(f): # log.info(base(f) + " does not exist. Cannot proceed.") # for j in range(i): # log.info(base(unprocs[j]) + " will NOT be processed") # total_unproc += mark_error(None, *unprocs[:i-1]) #Processing log.info("Processing %s:\n %s" % (n, "\n".join("\t%s" % base(u) for u in unprocs))) try: ballot = ballotfrom(unprocs, extensions) results = ballot.ProcessPages() except BallotException as e: total_unproc += mark_error(e, *unprocs) log.exception("Could not process ballot") continue csv = Ballot.results_to_CSV(results) #moz = Ballot.results_to_mosaic(results) #Write all data #make dirs: proc1d = dirn("proc", n) resultsd = dirn("results", n) resultsfilename = filen(resultsd, n) for p in (proc1d, resultsd): util.mkdirp(p) try: results_to_vop_files(results, resultsfilename) except Exception as e: print e #write csv and mosaic util.genwriteto(resultsfilename + ".txt", csv) #write to the database try: dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc procs = [ filen(proc1d, n + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: util.fatal("Could not rename %s", a) total_proc += const.num_pages log.info("%d images processed", const.num_pages) #hp.heap().dump('prof.hpy');hp.setref();gc.collect();hp.setref();hp.heap().dump('prof.hpy') finally: cache.save_all() dbc.close() next_ballot.save() log.info("%d images processed", total_proc) if total_unproc > 0: log.warning("%d images NOT processed.", total_unproc)
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) log.info("Log created.") # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() count_to_process = 0 while True: next_ballot_number = int(util.readfrom(util.root("nexttoprocess.txt"))) if count_to_process == 0: # wait here until get_count_to_process returns # it will wait on input instruction from stdio processing_command = get_processing_command(next_ballot_number) if processing_command.startswith("+"): next_ballot_number += const.num_pages util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("="): next_ballot_number = int(processing_command[1:]) util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("S"): count_to_process = 1 if processing_command.startswith("0"): count_to_process = 0 # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: if not os.path.exists(filename): errmsg = "File %s not present or available!" % ( base(filename), ) log.info(errmsg) # if a file is not yet available, that's not fatal raise FileNotPresentException(errmsg) #Processing #log.info("Processing %s:\n %s" % # (n, "\n".join("\t%s" % base(u) for u in unprocs)) #) log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(util.root("nexttoprocess.txt"), next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: print e sys.stdout.flush()