def real_build_front_layout(self, page): regionlist = [] n = 0 columns = ask( """Enter the column offsets of the vote columns, separated by commas""", CSV(int)) for cnum, column in enumerate(columns): print "Contests for Column", cnum, "at x offset", column while True: contest = ask( """Enter a contest name. When done entering contests, \ntype 'x' and the <enter> key to continue.""" ) if contest.strip().lower() == "x": break choices = ask("Enter a comma separated list of choices", CSV()) # values are the x1,y1,x2,y2 of the bounding box of the contest # bounding box, 0 for regular contest or 1 for proposition, # and the text of the contest; we'll just dummy them here regionlist.append( Ballot.Contest(column, 1, 199, 5 * const.dpi, 0, contest)) for choice in choices: x_offset = ask( "Enter the x offset of the upper left hand corner \nof the printed vote target for " + choice, int) y_offset = ask( "Enter the y offset of the upper left hand corner \nof the printed vote target for " + choice, int) # values are the x,y of the upper left corner # of the printed vote opportunity, # and the text of the choice #TODO add x2,y2 regionlist[-1].append( Ballot.Choice(x_offset, y_offset, choice)) return regionlist
def real_find_front_landmarks(self, page): a = ask( """Enter the x coordinate of an upper left landmark; if your template is not offset or tilted, you could use 150. If there's no such landmark, enter -1: """, int, -1) b = ask( """Now enter the corresponding y coordinate; if your template is not offset or tilted, you could use 75. If there's no such landmark, enter -1: """, int, -1) c = ask( """Enter the x coordinate of an upper RIGHT landmark; if your template is not offset or tilted, you could use 2050. If there's no such landmark, enter -1: """, int, -1) d = ask( """Enter the corresponding y coordinate; if your template is not offset or tilted, you could use 75. If there's no such landmark, enter -1: """, int, -1) if -1 in (a, b, c, d): raise Ballot.BallotException("Could not find landmarks") # flunk ballots with more than # allowed_corner_black_inches of black in corner # to avoid dealing with severely skewed ballots errmsg = "Dark %s corner on %s" testlen = self.allowed_corner_black xs, ys = page.image.size #boxes to test ul = (0, 0, testlen, testlen) ur = (xs - testlen, 0, xs - 1, testlen) lr = (xs - testlen, ys - testlen, xs - 1, ys - 1) ll = (0, ys - testlen, testlen, ys - 1) for area, corner in ((ul, "upper left"), (ur, "upper right"), (lr, "lower right"), (ll, "lower left")): avg_darkness = ask("What's the intensity at the " + corner, IntIn(0, 255)) if int(avg_darkness) < 16: raise Ballot.BallotException(errmsg % (corner, page.filename)) xoff = a yoff = b shortdiff = d - b longdiff = c - a rot = -shortdiff / float(longdiff) if abs(rot) > const.allowed_tangent: raise Ballot.BallotException( "Tilt %f of %s exceeds %f" % (rot, page.filename, const.allowed_tangent)) return rot, xoff, yoff, longdiff
def do(color, box, v, a): i = Image.new("RGB", (100, 100), "#fff") d = ImageDraw.Draw(i) d.rectangle((20, 20, 80, 80), fill="#000") d.rectangle(carve, fill="#fff") d.rectangle(box, fill=("#" + color*3)) s = Ballot.IStats(i.cropstats(100, 5, 20, 20, 60, 60, 1)) vp, ap = Ballot.IsVoted(i, s, None) assert vp == v and ap == a
def get_offsets_and_tangent_from_blocks(im, dpi, dash_sep_in_pixels): """ locate marks at top left, right of image return the x,y coordinates of the large timing marks at upper left and upper right, as well as the tangent of the tilt angle between them. """ found_left = False found_right = False iround = lambda x: int(round(x)) adj = lambda f: int(round(const.dpi * f)) croptop = adj(block_zone_upper_y) cropbottom = croptop + dpi leftstart = 0 leftend = adj(block_zone_width_to_crop) rightstart = im.size[0] - adj(block_zone_width_to_crop) rightend = im.size[0] - 1 vertical_dist_top_dashes = dash_sep_in_pixels vertical_dist_block_dashes = iround(dpi * .17) leftcrop = im.crop((leftstart, croptop, leftend, cropbottom)) rightcrop = im.crop((rightstart, croptop, rightend, cropbottom)) # look for black white black bar pattern and return y of pattern start scanx = adj(0.1) leftstarty = find_y_of_landmark_pattern(leftcrop, dpi, scanx, scanx * 2) if leftstarty == -1: raise Ballot.BallotException("Failed to find left landmark.") rightstarty = find_y_of_landmark_pattern(rightcrop, dpi, scanx, scanx * 2) if rightstarty == -1: raise Ballot.BallotException("Failed to find right landmark.") leftdashcentery = leftstarty + adj(v_offset_to_dash_center) rightdashcentery = rightstarty + adj(v_offset_to_dash_center) # now go leftward from scanx # along the center of the top dash until white or off edge leftstartx = 0 scanx = adj(0.2) for n in range(scanx): pix = leftcrop.getpixel(((scanx - n), leftdashcentery)) if pix[0] > 128: leftstartx = scanx - n break rightstartx = 0 for n in range(scanx): pix = rightcrop.getpixel(((scanx - n), rightdashcentery)) if pix[0] > 128: rightstartx = scanx - n break return (leftstartx, leftstarty + croptop, rightstart + rightstartx, rightstarty + croptop, (rightstarty - leftstarty) / (im.size[0] - adj(block_zone_width_to_crop)))
def find_landmarks(self, page): """ retrieve landmarks for a sequoia ballot, set tang, xref, yref Landmarks for the sequoia ballot are the "dash blocks" at the upper left and upper right. These are retrieved by calling get_offsets_and_tangent_from_blocks. """ iround = lambda x: int(round(x)) adj = lambda f: int(round(const.dpi * f)) dash_sep_in_pixels = adj(0.17) (a, b, c, d, tilt) = get_offsets_and_tangent_from_blocks(page.image, const.dpi, dash_sep_in_pixels) # flunk ballots with more than # allowed_corner_black_inches of black in corner # to avoid dealing with severely skewed ballots errmsg = "Dark %s corner on %s" testlen = self.allowed_corner_black xs, ys = page.image.size #boxes to test ul = (0, 0, testlen, testlen) ur = (xs - testlen, 0, xs - 1, testlen) lr = (xs - testlen, ys - testlen, xs - 1, ys - 1) ll = (0, ys - testlen, testlen, ys - 1) for area, corner in ((ul, "upper left"), (ur, "upper right"), (lr, "lower right"), (ll, "lower left")): cropped = page.image.crop(area) area_stat = ImageStat.Stat(cropped) if area_stat.mean[0] < 16: raise Ballot.BallotException(errmsg % (corner, page.filename)) xoff = a yoff = b shortdiff = d - b longdiff = c - a rot = -shortdiff / float(longdiff) if abs(rot) > const.allowed_tangent: raise Ballot.BallotException( "Tilt %f of %s exceeds %f" % (rot, page.filename, const.allowed_tangent)) self.log.debug("find landmarks returning %f,%d,%d, %d" % (rot, xoff, yoff, longdiff)) # Ballot.py defines a distance y2y to be used for scaling between # template and ballot images. Because both our landmarks are at the # top, we will consistently use longdiff for scaling in sequoia. return rot, xoff, yoff, longdiff
def get_layout_code(self, page): """ Determine the layout code(s) from the ulc barcode(s) """ # barcode zones to search are from 1/3" to 1/6" to left of ulc # and from 1/8" above ulc down to 2 5/8" below ulc. qtr_inch, sixth_inch, eighth_inch = adj(.22), adj(.1667), adj(.125) third_inch = adj(0.33) point2inch = adj(0.2) point02inch = adj(0.02) # don't pass negative x,y into getbarcode if page.xoff < point2inch: raise Ballot.BallotException("bad xref %d" % (page.xoff, )) if page.yoff < eighth_inch: raise Ballot.BallotException("bad yref") # pass image, x,y,w,h if page.xoff >= third_inch: startx = max(0, page.xoff - third_inch) widthx = sixth_inch elif page.xoff >= point2inch: startx = max(0, page.xoff - point2inch) widthx = 2 try: barcode = hart_barcode( page.image, startx, page.yoff - eighth_inch, widthx, eighth_inch + int(round( (7. * const.dpi) / 3.)) # bar code 2 1/3" ) except BarcodeException as e: self.log.info("%s %s" % (page.filename, e)) barcode = "NOGOOD" if not good_barcode(barcode): # try getting bar code from ocr of region beneath self.log.debug("Barcode no good, trying to get barcode via OCR") zone = page.image.crop( (max(0, page.xoff - adj(.35)), page.yoff + adj(2.5), max(1, page.xoff - adj(.1)), page.yoff + adj(4.3))) zone = zone.rotate(-90) #make it left to right barcode = self.extensions.ocr_engine(zone) #remove OCR errors specific to text guranteed numeric for bad, good in (("\n", ""), (" ", ""), ("O", "0"), ("o", "0"), ("l", "1"), ("I", "1"), ("B", "8"), ("Z", "2"), ("]", "1"), ("[", "1"), (".", ""), (",", ""), ("/", "1")): barcode = barcode.replace(bad, good) if not good_barcode(barcode): raise Ballot.BallotException("bad bar code") return barcode
def CapturePageInfo_test(): #create fake ballot image im = Image.new("RGB", (100, 400), "white") d = ImageDraw.Draw(im) #list of ul, ur for boxes to pepper im with boxes = [(10, 10), (10, 70), (40, 130), (30, 200), (10, 300)] for x, y in boxes: d.rectangle((x, y, x + 50, y + 50), "black") #define faux template chos = tuple( b + (str(i), True, False, False) for i, b in enumerate(boxes) ) tmpl, all = CONCHO((0, 0, 200, 400, "prop", "contest uno") + chos) #construct page, dpi only used in ratio, hence 1 vs default 0 page = Ballot.Page(dpi=1, image=im) page.as_template("precinct", tmpl) results = ShillBallot().CapturePageInfo(page) assert concho_vs_vd(all, results) for ret in results: # getcolors returns None if there are more than the specifed colors # so we're verifying the monochromicity of the crops assert ret.image.getcolors(1) is not None
def get_layout_code(self, page): print "In get_layout_code" print """ Determine the layout code by getting it from the user The layout code must be determined on a vendor specific basis; it is usually a series of dashes or a bar code at a particular location on the ballot. Layout codes may appear on both sides of the ballot, or only on the fronts. If the codes appear only on the front, you can file the back layout under a layout code generated from the front's layout code. """ print "In get_layout_code. Note that DuplexBallot only calls this for" print "the first in a pair of images." print "Note that if there's no barcode that DuplexBallot will attempt" print "to swap the front and back pages and you will be asked again" print "and if you still say there is no barcode, an error will be" print "raised" barcode = ask( """Enter a number as the simulated barcode, or -1 if your ballot is missing a barcode""", IntIn(0, 100), -1) # If this is a back page, need different arguments # to timing marks call; so have failure on front test # trigger a back page test if barcode == -1: raise Ballot.BallotException( "No barcode on front page of duplex ballot") page.barcode = barcode return barcode
def extract_VOP_test(): #XXX move if/when pushed to superclass block = Image.new("RGB", (100, 100), "#fff") ImageDraw.Draw(block).rectangle((20, 20, 60, 60), fill="#000") concho, all = CONCHO((0, 0, 100, 100, "prop", "description", (20, 20, "vop", True, False, False))) hb = hart_ballot.HartBallot ballot = hb.__new__(hb) ballot.extensions = NilXtnz() ballot.dpi = 100 ballot.oval_size = (40, 40) ballot.vote_target_horiz_offset = 20 #??? const.vote_intensity_threshold = 200 const.dark_pixel_threshold = 741 page = Ballot.Page(dpi=ballot.dpi, image=block) page.as_template("prec", concho) x, y, ss, im, v, w, a = ballot.extract_VOP( page, lambda *a: a, 1.0, all[0], ) assert x == 20 assert y == 26 #includes crop offset assert v assert not w assert not a
def extract_VOP(self, page, rotatefunc, scale, choice): """Extract statistics for a single oval or writein from the ballot. """ iround = lambda x: int(round(x)) adj = lambda f: int(round(const.dpi * f)) x, y = choice.coords() x = int(x) y = int(y) # NO horizontal margins in crop - grabbing region between marks! # const.margin_width_inches not used # hotspot_x_offset_inches IS used scaled_page_offset_x = page.xoff / scale scaled_page_offset_y = page.yoff / scale self.log.debug("Incoming coords (%d,%d), \ page offsets (%d,%d) template offsets (%d,%d)" % (x, y, page.xoff, page.yoff, scaled_page_offset_x, scaled_page_offset_y)) # adjust x and y for the shift of landmark between template and ballot x = iround(x + scaled_page_offset_x - page.template.xoff) y = iround(y + scaled_page_offset_y - page.template.yoff) self.log.debug("Result of transform: (%d,%d)" % (x, y)) x, y = rotatefunc(x, y, scale) cropx = x cropy = y cropy -= adj(.1) # NO horizontal margins in crop - grabbing region between marks! croplist = (cropx + self.hotspot_x_offset_inches, cropy - page.margin_height, min( cropx + self.hotspot_x_offset_inches + page.target_width, page.image.size[0] - 1), min(cropy + page.margin_height + page.target_height, page.image.size[1] - 1)) crop = page.image.crop(croplist) cropstat = ImageStat.Stat(crop) stats = Ballot.IStats(cropstats(crop, cropx, cropy)) #can be in separate func? voted, ambiguous = self.extensions.IsVoted(crop, stats, choice) writein = False if voted: # extension is overriden with local function for this ballot type writein = IsWriteIn(crop, stats, choice) if writein: x1 = min(self.writein_xoff + cropx, cropx) x2 = max(self.writein_xoff + cropx, cropx) y1 = min(self.writein_yoff + cropy, cropy + adj(.2)) y2 = max(self.writein_yoff + cropy, cropy + adj(.2)) crop = page.image.crop((x1, y1 - page.margin_height, min(x2, page.image.size[0] - 1), min(y2 + page.margin_height, page.image.size[1] - 1))) return cropx, cropy, stats, crop, voted, writein, ambiguous
def build_layout(self, page): """ get layout and ocr information from Demo ballot Building the layout will be the largest task for registering a new ballot brand which uses a different layout style. Here, we'll ask the user to enter column x-offsets, then contests and their regions, and choices belonging to the contest. """ print """Entering build_layout. You will need to provide a comma separated list of column offsets, then you will need to provide, for each column, information about each contest in that column: its contest text, its starting y offset, and the same for each choice in the contest. """ regionlist = [] n = 0 columns = ask( """Enter the column offsets of the vote columns, separated by commas""", CSV(int) ) for cnum, column in enumerate(columns): print "Contests for Column", cnum, "at x offset", column while True: contest = ask("""Enter a contest name. When done entering contests, \ntype 'x' and the <enter> key to continue.""") if contest.strip().lower() == "x": break choices = ask("Enter a comma separated list of choices", CSV()) # values are the x1,y1,x2,y2 of the bounding box of the contest # bounding box, 0 for regular contest or 1 for proposition, # and the text of the contest; we'll just dummy them here regionlist.append(Ballot.Contest(column, 1, 199, 5*const.dpi, 0, contest)) for choice in choices: x_offset = ask("Enter the x offset of the upper left hand corner \nof the printed vote target for " + choice, int) y_offset = ask("Enter the y offset of the upper left hand corner \nof the printed vote target for " + choice, int) # values are the x,y of the upper left corner # of the printed vote opportunity, # and the text of the choice #TODO add x2,y2 regionlist[-1].append(Ballot.Choice(x_offset, y_offset, choice)) return regionlist
def createElection(self): e = Election.Election(self.candidates) for person in self.population: ## determine if they want to vote if random.random() < person.VotingProb: ## they are voting ballot = Ballot.Ballot(self.candidates[person.vote()], self, person.age, person.race, person.Immigrant) e.addBallot(ballot) return e
def extract_VOP(self, page, rotatefunc, scale, choice): """Extract a single oval, or writein box, from the specified ballot""" iround = lambda x: int(round(x)) x, y = choice.coords() printed_oval_height = adj(const.target_height_inches) #BEGIN SHARABLE scaled_page_offset_x = page.xoff / scale scaled_page_offset_y = page.yoff / scale self.log.debug( "Incoming coords (%d,%d), \ page offsets (%d,%d) scaled page offsets (%d,%d), template offsets (%d,%d)" % (x, y, page.xoff, page.yoff, scaled_page_offset_x, scaled_page_offset_y, page.template.xoff, page.template.yoff)) # adjust x and y for the shift of landmark between template and ballot x = iround(x + scaled_page_offset_x - page.template.xoff) y = iround(y + scaled_page_offset_y - page.template.yoff) self.log.debug("Result of translation: (%d,%d)" % (x, y)) x, y = rotatefunc(x, y, scale) self.log.debug("Result of rotation: (%d,%d)" % (x, y)) # Below is using the pure python cropstats: cropx, cropy = x, y #not adjusted like in PILB cropstats crop = page.image.crop( (cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + page.target_width, page.image.size[0] - 1), min(cropy + page.margin_height + page.target_height, page.image.size[1] - 1))) stats = Ballot.IStats(cropstats(crop, x, y)) voted, ambiguous = self.extensions.IsVoted(crop, stats, choice) writein = self.extensions.IsWriteIn(crop, stats, choice) if writein: crop = page.image.crop( (cropx - page.margin_width + self.writein_xoff, cropy - page.margin_height + self.writein_yoff, min(cropx + page.margin_width + self.writein_width, page.image.size[0] - 1), min(cropy + page.margin_height + self.writein_height, page.image.size[1] - 1))) return cropx, cropy, stats, crop, voted, writein, ambiguous
def intakeSpreadsheet(filename): allBallots = [] with open(filename, 'r') as f: readCSV = csv.reader(f, delimiter=',') next(readCSV) for row in readCSV: #make a new ballot object timeStamp = row[0] #TimeStamp is always the first column tNumber = row[1] #TNumber should always be the second column in the spreadsheet currentBallot = Ballot.Ballot(timeStamp, tNumber) #if the preference is empty, will be empty string preferences = [] for column in row[2:]: if column.strip() != '': preferences.append(column) currentBallot.changeTopChoice(preferences[0]) currentBallot.preferences = preferences allBallots.append(currentBallot) return allBallots
def get_layout_code(self, page): """ Determine the layout code by getting it from the user The layout code must be determined on a vendor specific basis; it is usually a series of dashes or a bar code at a particular location on the ballot. Layout codes may appear on both sides of the ballot, or only on the fronts. If the codes appear only on the front, you can file the back layout under a layout code generated from the front's layout code. """ print "In get_layout_code" barcode = ask("""Enter a number as the simulated barcode, or -1 if your ballot is missing a barcode""", IntIn(0, 100), -1) # If this is a back page, need different arguments # to timing marks call; so have failure on front test # trigger a back page test if barcode == -1: raise Ballot.BallotException("No barcode found") page.barcode = barcode return barcode
def get_only_votes_from(self, image, contest_instance, croplist): """ given an area known to contain only votes, return info The cropped area will contain only voting areas. Voting areas will contain ovals in the oval column. Descriptive text to the right of the ovals will be assigned to each oval based on being at or below the oval. """ adj = lambda f: int(round(const.dpi * f)) oval_offset_into_column = adj(0.14) oval_end_offset_into_column = adj(0.39) votetext_offset_into_column = oval_end_offset_into_column votetext_offset_into_column += oval_offset_into_column votetext_offset_into_column += adj(0.02) choices = [] crop = image.crop(croplist) dark_zones = self.get_dark_zones(crop) next_dark_zones = dark_zones[1:] next_dark_zones.append([crop.size[1] - 2, crop.size[1] - 1]) skip = False for dz, ndz in zip(dark_zones, next_dark_zones): # if two dark zones are less than 0.3" apart, merge them and skip # this allows two line entries to be handled as single choices # !!! check for existence of oval in oval zone instead if skip == True: skip = False continue if (ndz[0] - dz[0]) < adj(0.3): skip = True if skip: end = 1 else: end = 0 blankzone = crop.crop((oval_end_offset_into_column, dz[0], votetext_offset_into_column, ndz[end])) blankzonestat = ImageStat.Stat(blankzone) zonecrop = crop.crop((votetext_offset_into_column, dz[0], crop.size[0] - (const.dpi / 10), ndz[end])) zonetext = self.extensions.ocr_engine(zonecrop) zonetext = self.extensions.ocr_cleaner(zonetext) zonetext = zonetext.strip() zonetext = zonetext.replace("\n", "//").strip() if blankzonestat.mean[0] > 244: append_x = croplist[0] + adj(0.14) append_y = croplist[1] + dz[0] # search through oval zone looking for oval, # adjust y to top of oval, not top of text contig = 0 found = False for adj_y in range(adj(0.04), adj(0.2)): ovalcrop = crop.crop( (oval_offset_into_column, dz[0] + adj_y, oval_end_offset_into_column, dz[0] + adj_y + 1)) ovalstat = ImageStat.Stat(ovalcrop) if ovalstat.extrema[0][0] < 240: contig += 1 if contig > 10: append_y += (adj_y - 10) found = True break else: contig = 0 if not found: continue self.log.debug("Appending choice %d %d %s" % (append_x, append_y, zonetext)) choice = Ballot.Choice(append_x, append_y, zonetext) contest_instance.append(choice) return contest_instance
def get_contests_and_votes_from(self, image, regionlist, croplist): """ given an area known to contain votes and desc text, return info The cropped area will contain contest descriptions and voting areas. Unfortunately, the contest descriptions are not indented away from the oval voting areas. So... we crop looking for white line splits, and then treat every line as either part of a contest or as a vote line, depending on whether we find a pattern of white indicating the line contains only an oval and a single word, YES or NO. """ adj = lambda f: int(round(const.dpi * f)) oval_offset_into_column = adj(0.14) oval_end_offset_into_column = adj(0.39) votetext_offset_into_column = oval_end_offset_into_column votetext_offset_into_column += oval_offset_into_column votetext_offset_into_column += adj(0.02) half_intensity = 128 contests = [] contest_string = "" crop = image.crop(croplist) # indent by 1/10" to avoid edges, then crop single pixel lines, # finding beginning and end of zones which include dark pixels # now check each dark zone to see if it is a vote op # or if it is descriptive text; vote ops will have an oval # in the oval channel beginning at 0.14 and extending for .24, # then text beginning at .38 dark_zones = self.get_dark_zones(crop) contest_created = False for dz in dark_zones: zonecrop1 = crop.crop((const.dpi / 10, dz[0], crop.size[0] - (const.dpi / 10), dz[1])) zonecrop2 = crop.crop((oval_end_offset_into_column, dz[0], votetext_offset_into_column, dz[1])) zone2stat = ImageStat.Stat(zonecrop2) zonecrop3 = crop.crop( (votetext_offset_into_column, dz[0], votetext_offset_into_column + const.dpi, dz[1])) zone1text = self.extensions.ocr_engine(zonecrop1) zone1text = self.extensions.ocr_cleaner(zone1text) zone3text = self.extensions.ocr_engine(zonecrop3) zone3text = self.extensions.ocr_cleaner(zone3text) intensity_suggests_voteop = False length_suggests_voteop = False if zone2stat.mean[0] > 244: intensity_suggests_voteop = True if len(zone3text) < 6: length_suggests_voteop = True if not intensity_suggests_voteop and not length_suggests_voteop: contest_created = False contest_string += zone1text.replace("\n", "/") elif intensity_suggests_voteop and length_suggests_voteop: # create contest if none created, then if not contest_created: contest_created = True self.log.debug("Creating contest %s" % (contest_string, )) regionlist.append( Ballot.Contest(croplist[0], croplist[1] + dz[0], croplist[2], croplist[1] + dz[1], 0, contest_string)) contest_string = "" # add voteop to contest choice_string = zone3text self.log.debug("Adding choice %s" % (choice_string, )) regionlist[-1].append( Ballot.Choice(croplist[0] + oval_offset_into_column, croplist[1] + dz[0], choice_string)) else: if contest_created: contest_string += zone1text.replace("\n", "//") else: self.log.debug( "Problem determining whether contest or choice") self.log.debug("Gap mean values %s" % (zone2stat.mean, )) self.log.debug("Zone3 text %s" % (zone3text, )) self.log.debug("Contest string: %s" % (contest_string, )) return dark_zones
def generate_transition_list_from_zones(self, image, regionlist, column_bounds, left, middle): """ given the pair of zone lists, generate a comprehensive list We should then be able to merge these sets of split information: anything where we find solid black or halftone is a definite break which may be followed either by another black or halftone area, by a description area, or by a vote area. """ ccontest_default = "No current contest" ccontest = ccontest_default cjurisdiction_default = "No current jurisdiction" cjurisdiction = cjurisdiction_default contest_instance = None next_white_is_votearea = False this_white_is_votearea = False next_white_is_yesno = False this_white_is_yesno = False for n in range(len(left)): this_white_is_votearea = False if next_white_is_votearea == True: this_white_is_votearea = True next_white_is_votearea = False this_white_is_yesno = False if next_white_is_yesno == True: this_white_is_yesno = True next_white_is_yesno = False this_y = left[n][0] try: next_zone = left[n + 1] except IndexError: next_zone = [0, 'X'] next_y = next_zone[0] rel_end = next_y - (const.dpi / 10) if left[n][1] == 'B': self.log.debug("Black zone at %d to %d %s" % (this_y, next_y, next_zone)) # if it's a legitimate black zone and the next zone is white, # that white zone is a Yes/No Vote Area (or empty) if (next_y - this_y) > (const.dpi / 4): next_white_is_yesno = True # this zone becomes the current Jurisdiction crop = image.crop( (column_bounds[0], this_y, column_bounds[1], next_y)) cjurisdiction = self.extensions.ocr_engine(crop) self.log.debug("Jurisdiction %s" % (cjurisdiction, )) cjurisdiction = self.extensions.ocr_cleaner(cjurisdiction) cjurisdiction = cjurisdiction.replace("\n", "//").strip() self.log.debug("Cleaned Jurisdiction %s" % (cjurisdiction, )) # and the current contest is set # from the descriptive text # at the start of the Yes No Vote area if left[n][1] == 'G': self.log.debug("Gray zone at %d to %d %s" % (this_y, next_y, next_zone)) # if it's a legitimage gray zone and the next zone is white, # that white zone is a voting area (or empty) if (next_y - this_y) > (const.dpi / 2): next_white_is_votearea = True crop = image.crop( (column_bounds[0], this_y, column_bounds[1], next_y)) crop = Image.eval(crop, elim_halftone) ccontest = self.extensions.ocr_engine(crop) ccontest = ccontest.replace("\n", "//").strip() self.log.debug("Contest %s" % (ccontest, )) ccontest = self.extensions.ocr_cleaner(ccontest) self.log.debug("Cleaned Contest %s" % (ccontest, )) contest_instance = Ballot.Contest(column_bounds[0], this_y, column_bounds[1], this_y + next_y, 0, ccontest) regionlist.append(contest_instance) if left[n][1] == 'W': if this_white_is_votearea: # no descriptive text anticipated self.get_only_votes_from( image, contest_instance, (column_bounds[0], this_y, column_bounds[1], next_y)) if this_white_is_yesno: # descriptive text sets current contest, # votes are in stretches where the middle is white self.get_contests_and_votes_from( image, regionlist, (column_bounds[0], this_y, column_bounds[1], next_y)) self.log.debug("White zone at %d to %d %s" % (this_y, next_y, next_zone)) return regionlist
def build_layout(self, page, back=False): """ Get layout and ocr information from Diebold ballot Assumes page.image has been deskewed. First, determine number of columns and starting x of each column. Initial pass dummies column starts by pre-filling column list at known offsets for 8.5" wide 3 column. Then, for each column: Get horizontal lines spanning column Horizontal lines separated by at least 1/2" may be a contest; within each potential contest, locate vote targets. Potential contests with more than one vote target may become contests appended to Contest list, the vote targets become choices on the Contest's choice list. Return list of contests. """ thinline_width = adj(0.01) text_margin = adj(0.03) contest_list = [] # columns begin 1/32" from inboard side of first dash, # and the first two columns of a three column Diebold ballot # are each 2.75" wide landmark_x = page.landmarks[0][0] column_bound_vlines = (landmark_x + adj(.03), landmark_x + adj(2.78), landmark_x + adj(5.53), landmark_x + adj(8.03)) # the last boundary vline is not a column start, only a column end column_start_vlines = column_bound_vlines[:-1] # the next column's start is the current column's end column_end_vlines = column_bound_vlines[1:] vthip = adj(const.vote_target_horiz_offset_inches) vt_width_pixels = adj(const.target_width_inches) for column_start_x, column_end_x in zip(column_start_vlines, column_end_vlines): # add the config file vote offset to the column_x # to get the the start of a vote oval; add half the # oval width from the config file to get its center oval_center_x = column_start_x + vthip + (vt_width_pixels / 2) oval_text_start_x = column_start_x + vthip + vt_width_pixels + text_margin # find horizontal lines searching at column center column_center_x = (column_start_x + column_end_x) / 2 lines = find_horizontal_lines(page, column_center_x, const.dpi) #print "Lines",lines, "at column center",column_center_x # find which pairs could be contests pot_contests = find_potential_contests(lines, const.dpi / 2) #print "Potential Contests",pot_contests # find the vote targets between those pairs for contest_start_y, contest_end_y in pot_contests: self.log.debug("Searching targets from %d,%d to %d,%d" % (column_start_x, contest_start_y, column_end_x, contest_end_y)) vote_targets = find_untinted_voteops(page, oval_center_x, contest_start_y, contest_end_y, const.dpi) #print "Found vote targets at",vote_targets # if you've found any vote targets, # create a contest and add vote_targets as choices if len(vote_targets) > 0: # ocr contest text vertical_space_after_description = const.dpi / 10 contest_text_croplist = (column_start_x + thinline_width, contest_start_y + thinline_width, column_end_x - thinline_width, vote_targets[0][1] - vertical_space_after_description) contest_text = self.extensions.ocr_engine( page.image.crop(contest_text_croplist)) contest_text = self.extensions.ocr_cleaner(contest_text) #pdb.set_trace() this_contest = Ballot.Contest(column_start_x, contest_start_y, column_end_x, contest_end_y, 0, contest_text) #print "Appending",this_contest #print contest_list contest_list.append(this_contest) # add vote targets for n in range(len(vote_targets)): this_target_x, this_target_y = vote_targets[n] this_target_text_x = (this_target_x + vt_width_pixels + text_margin) this_target_text_y = (this_target_y - text_margin) try: next_target_x, next_target_y = vote_targets[n + 1] except IndexError: next_target_x = column_end_x - thinline_width next_target_y = contest_end_y - thinline_width if abs(next_target_x - this_target_x) > (const.dpi / 4): # the two targets bottom edges are aligned choice_text_croplist = (this_target_text_x, this_target_text_y, next_target_x - text_margin, contest_end_y - thinline_width) else: # the two targets left edges are aligned choice_text_croplist = (this_target_text_x, this_target_text_y, column_end_x - text_margin, next_target_y - text_margin) choice_text = self.extensions.ocr_engine( page.image.crop(choice_text_croplist)) choice_text = self.extensions.ocr_cleaner(choice_text) this_choice = Ballot.Choice(this_target_x, this_target_y, choice_text) this_contest.choices.append(this_choice) return contest_list
def get_title_and_votes_from(self, image, regionlist, croplist, last_title="NO TITLE"): """ given an area known to contain contest title and votes, return info The cropped area will contain a title area at the top, followed by voting areas. Voting areas will contain ovals in the oval column. Descriptive text to the right of the ovals will be assigned to each oval based on being at or below the oval. """ ov_off = adj(const.vote_target_horiz_offset_inches) ov_ht = adj(const.target_height_inches) ov_wd = adj(const.target_width_inches) ov_end = ov_off + ov_wd txt_off = adj(const.candidate_text_horiz_offset_inches) choices = [] crop = image.crop(croplist) if croplist[2] == 0 or croplist[3] == 0: return [] dark_zones = self.get_dark_zones(crop) next_dark_zones = dark_zones[1:] next_dark_zones.append([crop.size[1] - 2, crop.size[1] - 1]) skipcount = 0 # for each dark zone, determine the first dark x encountered_oval = False dzstyle = [] for dz in dark_zones: # crop each dark strip # losing the area to the left of the possible vote target # and an equivalent area on the right dzcrop = crop.crop((ov_off, dz[0], crop.size[0] - ov_off, dz[1])) firstx = dzcrop.size[0] lastx = 0 for y in range(dzcrop.size[1]): for x in range(dzcrop.size[0]): p0 = dzcrop.getpixel((x, y)) if p0[0] < 192: firstx = min(firstx, x) lastx = max(lastx, x) lastxindent = dzcrop.size[0] - lastx # unfortunately, it is hard to tell a filled oval from a title # that begins about the same x offset as ovals; we will # recognize that titles come first and are symmetric # ovals start at a defined offset and will have a minimum height # and, if empty, will match a particular dark/light pattern symmetric = (abs(firstx - lastxindent) < adj(0.05)) tall_enough = (dz[1] - dz[0] >= int(ov_ht * .8)) ov_pat = oval_pattern(dzcrop, ov_ht, ov_wd, txt_off - ov_off) if not encountered_oval and not ov_pat: dzstyle.append("T") elif tall_enough and firstx <= adj(0.02): dzstyle.append("V") encountered_oval = True elif ((firstx >= (txt_off - ov_off - adj(0.02))) and not tall_enough): dzstyle.append("W") else: dzstyle.append("-") contest_instance = None choice = None title_array = [] contest_created = False for index, style in enumerate(dzstyle): if style == "T": titlezone = crop.crop( (adj(0.1), dark_zones[index][0], crop.size[0] - adj(0.1), dark_zones[index][1])) zonetext = ocr.tesseract(titlezone) zonetext = ocr.clean_ocr_text(zonetext) zonetext = zonetext.strip() zonetext = zonetext.replace("\n", "//").strip() title_array.append(zonetext) elif style == "V": if title_array is not None: zonetext = "/".join(title_array) title_array = None if len(zonetext) < 4: zonetext = last_title contest_instance = Ballot.Contest(croplist[0], croplist[1], croplist[2], croplist[3], 0, zonetext[:80]) contest_created = True regionlist.append(contest_instance) if not contest_created: print "WARNING: Choice but no contest." pdb.set_trace() continue choicezone = crop.crop( (txt_off, dark_zones[index][0], crop.size[0] - adj(0.1), dark_zones[index][1])) zonetext = ocr.tesseract(choicezone) zonetext = ocr.clean_ocr_text(zonetext) zonetext = zonetext.strip() zonetext = zonetext.replace("\n", "//").strip() # find the y at which the actual oval begins # which may be lower than the dark_zone start choice_y = dark_zones[index][0] # Look up to 0.2 inches beneath beginning of dark zone # for an oval darkening the oval region contig = 0 for adj_y in range(adj(0.2)): ovalcrop = crop.crop((ov_off, choice_y + adj_y, ov_end, choice_y + adj_y + 1)) ovalstat = ImageStat.Stat(ovalcrop) if ovalstat.extrema[0][0] < 240: contig += 1 if contig > adj(0.03): choice_y += (adj_y - adj(0.03)) found = True break else: contig = 0 choice = Ballot.Choice(croplist[0] + ov_off, croplist[1] + choice_y, zonetext) contest_instance.append(choice) #if zonetext.startswith("Randy"): # print "Randy" # pdb.set_trace() # print "Randy" elif style == "W" and len(dzstyle) > ( index + 1) and dzstyle[index + 1] in "W-": if title_array is not None: title_array = None try: choice.description = "Writein" except: pass return regionlist
def extract_VOP(self, page, rotatefunc, scale, choice): """Extract a single oval, or writein box, from the specified ballot""" iround = lambda x: int(round(x)) x, y = choice.coords() printed_oval_height = adj(const.target_height_inches) #BEGIN SHARABLE scaled_page_offset_x = page.xoff / scale scaled_page_offset_y = page.yoff / scale self.log.debug( "Incoming coords (%d,%d), \ page offsets (%d,%d) scaled page offsets (%d,%d), template offsets (%d,%d)" % (x, y, page.xoff, page.yoff, scaled_page_offset_x, scaled_page_offset_y, page.template.xoff, page.template.yoff)) # adjust x and y for the shift of landmark between template and ballot x = iround(x + scaled_page_offset_x - page.template.xoff) y = iround(y + scaled_page_offset_y - page.template.yoff) self.log.debug("Result of translation: (%d,%d)" % (x, y)) x, y = rotatefunc(x, y, scale) self.log.debug("Result of rotation: (%d,%d)" % (x, y)) # Below is using the pure python cropstats: cropx, cropy = x, y #not adjusted like in PILB cropstats crop = page.image.crop( (cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + page.target_width, page.image.size[0] - 1), min(cropy + page.margin_height + page.target_height, page.image.size[1] - 1))) # Commenting out material below as regardless of adding or subtracting # based upon it, it makes things worse in some situations (?) # The rotation is working well to capture the correct area. """ # check strip at center to look for either filled or empty oval; # recenter vertically stripe = crop.crop(((crop.size[0]/2),0,(crop.size[0]/2)+1,crop.size[1]-1)) before_oval = 0 after_oval = 0 oval = 0 dark_threshold = 192 stripedata = list(stripe.getdata()) for num,p in enumerate(stripedata): if p[0] > dark_threshold: before_oval += 1 else: try: if ((stripedata[before_oval+printed_oval_height-2][0] < dark_threshold) or (stripedata[before_oval+printed_oval_height-1][0] < dark_threshold) or (stripedata[before_oval+printed_oval_height][0] < dark_threshold) or (stripedata[before_oval+printed_oval_height+1][0] < dark_threshold) or (stripedata[before_oval+printed_oval_height+2][0] < dark_threshold)): oval_start = num ov_end = num + printed_oval_height after_oval = stripe.size[1] - (oval_start+printed_oval_height) break except IndexError: break afterlessbefore = int(round((after_oval - before_oval)/2)) if abs(afterlessbefore)>2: cropy += afterlessbefore self.log.debug("Result of afterlessbefore %d: (%d,%d)" % ( afterlessbefore,x,cropy)) crop = page.image.crop(( cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + page.target_width, page.image.size[0]-1), min(cropy + page.margin_height + page.target_height, page.image.size[1]-1) )) """ stats = Ballot.IStats(cropstats(crop, x, y)) voted, ambiguous = self.extensions.IsVoted(crop, stats, choice) writein = self.extensions.IsWriteIn(crop, stats, choice) if writein: crop = page.image.crop( (cropx - page.margin_width + self.writein_xoff, cropy - page.margin_height + self.writein_yoff, min(cropx + page.margin_width + self.writein_width, page.image.size[0] - 1), min(cropy + page.margin_height + self.writein_height, page.image.size[1] - 1))) return cropx, cropy, stats, crop, voted, writein, ambiguous
def main(): NextEqualsPrefix = "Next=" MorePrompt = ":" NextToProcessFile = "" miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(util.root("extraction.log")) # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: %s check %s !", (const.layout_brand, cfg_file)) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database!") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() NextToProcessFile = util.root("nexttoprocess.txt") count_to_process = 0 file_problem = False while True: log.debug("Top of loop.") next_ballot_number = int(util.readfrom(NextToProcessFile)) log.debug("Read %d from %s" % (next_ballot_number, NextToProcessFile)) if count_to_process == 0: # send prompt to controlling process, "READY:" or "+ for SKIP:" if file_problem: file_problem = False # do not remove space after %06d print "Next=%06d , + to SKIP:" % (next_ballot_number, ) else: # do not remove space after %06d print "Next=%06d , READY:" % (next_ballot_number, ) sys.stdout.flush() # wait here until get_count_to_process returns # it will wait on input instruction from stdio try: count_to_process = get_count_to_process( next_ballot_number, log) except DoIncrementException, e: log.debug("Do increment exception") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) log.debug( "Wrote %d to next_ballot_number, count to process is %d" % (next_ballot_number + const.num_pages, count_to_process)) count_to_process = 0 log.debug("Setting count to process to 0.") continue # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: log.info("Checking for path.") if not os.path.exists(filename): log.info("File not present.") errmsg = "File %s not present or available!!!" % ( base(filename), ) log.info(errmsg.replace("!!!", "")) print errmsg sys.stdout.flush() raise FileNotPresentException(filename) log.info("Path found.") #Processing log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(NextToProcessFile, next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: file_problem = True print "FileNotPresentException" sys.stdout.flush() log.info("FileNotPresentException occurred") continue
def find_landmarks(self, page): """ retrieve landmarks for Saguache images, set tang, xref, yref Landmarks for the Saguache Ballot will be the (x, y) pairs at the center of the two upper plus in a circle registration marks. They are searched for in the upper left and upper right square inches of the image. The coordinates of the pair at upper left are returned, along with a rotation value calculated from the two pairs. Ballots would be rejected at this stage if there is excessive black in any corner, potentially indicating a scanning problem. Current error handling will generally log and terminate on first BallotException. """ crop = page.image.crop((0, 0, const.dpi, const.dpi)) (a, b) = find_plus_target(crop, const.dpi) crop = page.image.crop( (page.image.size[0] - const.dpi, 0, page.image.size[0], const.dpi)) (c, d) = find_plus_target(crop, const.dpi) if a == -1 or b == -1 or c == -1 or d == -1: raise Ballot.BallotException("Could not find landmarks") # adjust c to ballot coordinates from crop coordinates c += (page.image.size[0] - const.dpi) # flunk ballots with more than # allowed_corner_black_inches of black in corner # to avoid dealing with severely skewed ballots errmsg = "Dark %s corner on %s" testlen = self.allowed_corner_black xs, ys = page.image.size #boxes to test ul = (0, 0, testlen, testlen) ur = (xs - testlen, 0, xs - 1, testlen) lr = (xs - testlen, ys - testlen, xs - 1, ys - 1) ll = (0, ys - testlen, testlen, ys - 1) for area, corner in ((ul, "upper left"), (ur, "upper right"), (lr, "lower right"), (ll, "lower left")): if ImageStat.Stat(page.image.crop(area)).mean[0] < 16: raise Ballot.BallotException(errmsg % (corner, page.filename)) xoff = a yoff = b shortdiff = d - b longdiff = c - a rot = -shortdiff / float(longdiff) if abs(rot) > const.allowed_tangent: raise Ballot.BallotException( "Tilt %f of %s exceeds %f" % (rot, page.filename, const.allowed_tangent)) return rot, xoff, yoff
def find_landmarks(self, page): """ retrieve landmarks for Hart images, set tang, xref, yref Landmarks for the Hart Ballot will be the ulc, urc, lrc, llc (x,y) pairs marking the four corners of the main surrounding box.""" TOP = True BOT = False LEFT = True RIGHT = False log = logging.getLogger('') #log.info("Entering hart_ballot find_landmarks.") #tiltinfo, from upperleft clockwise: #[(x,y),(x,y),(x,y),(x,y)] or None tiltinfo = [] left_starting_x_offset = 2 * const.dpi right_starting_x_offset = page.image.size[0] - int(2.5 * const.dpi) if right_starting_x_offset <= int(const.dpi * .34): raise Ballot.BallotException( "Image width of %d pixels at %d dpi unexpectedly narrow." % (page.image.size[0], const.dpi)) hline = scan_strips_for_horiz_line_y(page.image, const.dpi, left_starting_x_offset, const.dpi / 2, const.dpi / 2, TOP) tiltinfo.append( follow_hline_to_corner(page.image, const.dpi, left_starting_x_offset, hline, LEFT)) hline = scan_strips_for_horiz_line_y(page.image, const.dpi, right_starting_x_offset, const.dpi / 2, const.dpi / 2, TOP) tiltinfo.append( follow_hline_to_corner(page.image, const.dpi, right_starting_x_offset, hline, RIGHT)) hline = scan_strips_for_horiz_line_y(page.image, const.dpi, right_starting_x_offset, const.dpi / 2, const.dpi / 2, BOT) tiltinfo.append( follow_hline_to_corner(page.image, const.dpi, right_starting_x_offset, hline, RIGHT)) hline = scan_strips_for_horiz_line_y(page.image, const.dpi, left_starting_x_offset, const.dpi / 2, const.dpi / 2, BOT) tiltinfo.append( follow_hline_to_corner(page.image, const.dpi, left_starting_x_offset, hline, LEFT)) # removing PILB call #tiltinfo = page.image.gethartlandmarks(const.dpi, 0) if tiltinfo is None or tiltinfo[0][0] == 0 or tiltinfo[1][0] == 0: page.blank = True #needs to ensure it is a page somehow self.log.info("Nonballot page at %s " % (page, )) return 0.0, 0, 0, 0 # flunk ballots with more than # allowed_corner_black_inches of black in corner # to avoid dealing with severely skewed ballots errmsg = "Dark %s corner on %s" testlen = self.allowed_corner_black xs, ys = page.image.size #boxes to test ul = (0, 0, testlen, testlen) ur = (xs - testlen, 0, xs - 1, testlen) lr = (xs - testlen, ys - testlen, xs - 1, ys - 1) ll = (0, ys - testlen, testlen, ys - 1) for area, corner in ((ul, "upper left"), (ur, "upper right"), (lr, "lower right"), (ll, "lower left")): if ImageStat.Stat(page.image.crop(area)).mean[0] < 16: raise Ballot.BallotException(errmsg % (corner, page.filename)) xoff = tiltinfo[0][0] yoff = tiltinfo[0][1] shortdiff = tiltinfo[3][0] - tiltinfo[0][0] longdiff = tiltinfo[3][1] - tiltinfo[0][1] hypot = math.sqrt(shortdiff * shortdiff + longdiff * longdiff) if longdiff != 0: rot = shortdiff / float(longdiff) else: rot = 0 if abs(rot) > const.allowed_tangent: raise Ballot.BallotException( "Tilt %f of %s exceeds %f" % (rot, page.filename, const.allowed_tangent)) page.tiltinfo = tiltinfo return rot, xoff, yoff, hypot
def find_landmarks(self, page): """ retrieve landmarks for a demo template, set tang, xref, yref Landmarks for the demo ballot are normally at 1/2" down and 1" in from the top left and top right corners. The "image" you are using as a template may be offset or tilted, in which case that information will be recorded so it may be taken into account when future images are examined. """ print "In find_landmarks" a = ask("""Enter the x coordinate of an upper left landmark; if your template is not offset or tilted, you could use 150. If there's no such landmark, enter -1: """, int, -1) b = ask("""Now enter the corresponding y coordinate; if your template is not offset or tilted, you could use 75. If there's no such landmark, enter -1: """, int, -1) c = ask("""Enter the x coordinate of an upper RIGHT landmark; if your template is not offset or tilted, you could use 2050. If there's no such landmark, enter -1: """, int, -1) d = ask("""Enter the corresponding y coordinate; if your template is not offset or tilted, you could use 75. If there's no such landmark, enter -1: """, int, -1) if -1 in (a, b, c, d): raise Ballot.BallotException("Could not find landmarks") # flunk ballots with more than # allowed_corner_black_inches of black in corner # to avoid dealing with severely skewed ballots errmsg = "Dark %s corner on %s" testlen = self.allowed_corner_black xs, ys = page.image.size #boxes to test ul = (0, 0, testlen, testlen) ur = (xs - testlen, 0, xs - 1, testlen) lr = (xs - testlen, ys - testlen, xs - 1, ys - 1) ll = (0, ys - testlen, testlen, ys - 1) for area, corner in ((ul, "upper left"), (ur, "upper right"), (lr, "lower right"), (ll, "lower left")): avg_darkness = ask( "What's the intensity at the " + corner, IntIn(0, 255) ) if int(avg_darkness) < 16: raise Ballot.BallotException(errmsg % (corner, page.filename)) xoff = a yoff = b shortdiff = d - b longdiff = c - a rot = -shortdiff/float(longdiff) if abs(rot) > const.allowed_tangent: raise Ballot.BallotException( "Tilt %f of %s exceeds %f" % (rot, page.filename, const.allowed_tangent) ) return rot, xoff, yoff, longdiff
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) log.info("Log created.") # create initial toplevel directories if they don't exist for p in ("%s" % ("templates"), "%s" % ("template_images"), "%s" % ("composite_images"), "results", "proc", "errors"): util.mkdirp(util.root(p)) # make sure you have code for ballot type spec'd in config file try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(database=const.dbname, user=const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() log.info("Database connected.") total_images_processed, total_images_left_unprocessed = 0, 0 base = os.path.basename # Each time given a signal to proceed for count_to_process ballots, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. # for profiling # from guppy import hpy;hp=hpy();hp.setref(); # import gc;gc.disable();gc.collect();hp.setref() count_to_process = 0 while True: next_ballot_number = int(util.readfrom(util.root("nexttoprocess.txt"))) if count_to_process == 0: # wait here until get_count_to_process returns # it will wait on input instruction from stdio processing_command = get_processing_command(next_ballot_number) if processing_command.startswith("+"): next_ballot_number += const.num_pages util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("="): next_ballot_number = int(processing_command[1:]) util.writeto(util.root("nexttoprocess.txt"), next_ballot_number) count_to_process = 1 if processing_command.startswith("S"): count_to_process = 1 if processing_command.startswith("0"): count_to_process = 0 # we're done when we get instructed to process 0 if count_to_process == 0: break count_to_process -= 1 try: # get number of next image, # clean up, in case... gc.collect() log.debug("Request for %d" % (next_ballot_number, )) unprocs = [ incomingn(next_ballot_number + m) for m in range(const.num_pages) ] log.info(unprocs) # we need all images for sheet to be available to process it for filename in unprocs: if not os.path.exists(filename): errmsg = "File %s not present or available!" % ( base(filename), ) log.info(errmsg) # if a file is not yet available, that's not fatal raise FileNotPresentException(errmsg) #Processing #log.info("Processing %s:\n %s" % # (n, "\n".join("\t%s" % base(u) for u in unprocs)) #) log.debug("Creating ballot.") try: ballot = ballotfrom(unprocs, extensions) log.debug("Created ballot, processing.") results = ballot.ProcessPages() log.debug("Processed.") except BallotException as e: total_images_left_unprocessed += mark_error(e, *unprocs) log.exception("Could not process ballot") continue #Write all data #make dirs: proc1d = dirn("proc", next_ballot_number) resultsd = dirn("results", next_ballot_number) resultsfilename = filen(resultsd, next_ballot_number) for p in (proc1d, resultsd): util.mkdirp(p) #try: # results_to_vop_files(results,resultsfilename) #except Exception as e: # log.info(e) # print e #write csv and mosaic #log.info("local results_to_CSV") #csv = results_to_CSV(results,log) #log.info("Back from results_to_CSV") #util.genwriteto(resultsfilename + ".csv", csv) #write to the database try: log.debug("Inserting to db") dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) log.info("Could not commit to db") print "Could not commit to db!" util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc log.debug("Renaming") procs = [ filen(proc1d, next_ballot_number + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: log.info("Could not rename %s" % a) util.fatal("Could not rename %s", a) total_images_processed += const.num_pages # Tell caller you've processed all images of this ballot log.debug("Requesting next") util.writeto(util.root("nexttoprocess.txt"), next_ballot_number + const.num_pages) # update next ballot file with next image number log.debug("Done writing nexttoprocess.txt") #print "%d extracted. " % (next_ballot_number,) log.info("%d images processed", const.num_pages) # for profiling # hp.heap().dump('prof.hpy');hp.setref();gc.collect(); # hp.setref();hp.heap().dump('prof.hpy') except FileNotPresentException, e: print e sys.stdout.flush()
def extract_VOP(self, page, rotatefunc, scale, choice): """Extract a single oval, or writein box, from the specified ballot. We'll tell you the coordinates, you tell us the stats. The information gathered should enable the IsVoted function to make a reasonable decision about whether the area was voted, but the data is also available to anyone else wanting to see the raw statistics to make their own decision. """ print "In extract_VOP" adj = lambda f: int(round(const.dpi * f)) iround = lambda x: int(round(x)) # choice coords should be the upper left hand corner # of the bounding box of the printed vote target adj = lambda f: int(round(const.dpi * f)) x, y = choice.coords() x = int(x) y = int(y) margin_width = page.margin_width margin_height = page.margin_height scaled_page_offset_x = page.xoff/scale scaled_page_offset_y = page.yoff/scale self.log.debug("Incoming coords (%d,%d), \ page offsets (%d,%d) template offsets (%d,%d)" % ( x,y, page.xoff,page.yoff, scaled_page_offset_x,scaled_page_offset_y)) # adjust x and y for the shift of landmark between template and ballot x = iround(x + scaled_page_offset_x - page.template.xoff) y = iround(y + scaled_page_offset_y - page.template.yoff) self.log.debug("Result of transform: (%d,%d)" % (x,y)) x, y = rotatefunc(x, y, scale) ow, oh = page.target_width,page.target_height print """At %d dpi, on a scale of 0 to 255, tell us the average intensity from (%d, %d) for width %d height %d, given an offset from the specified x of %d """ % (const.dpi, x, y, ow, oh, self.vote_target_horiz_offset) intensity = ask("Intensity", IntIn(0, 255)) lowest = ask("Lowest count", IntIn(0, 1000)) low = ask("Low count", IntIn(0, 1000)) high = ask("High count", IntIn(0, 1000)) highest = ask("Highest count", IntIn(0, 1000)) suspicious = ask("Value of suspicious", int) ari, agi, abi = intensity, intensity, intensity lowestr, lowestg, lowestb = lowest, lowest, lowest lowr, lowg, lowb = low, low, low highestr, highestg, highestb = highest, highest, highest highr, highg, highb = high, high, high stats = Ballot.IStats( (ari, lowestr, lowr, highr, highestr, agi, lowestg, lowg, highg, highestg, abi, lowestb, lowb, highb, highestb, x, y, 0) ) #can be in separate func? cropx = stats.adjusted.x cropy = stats.adjusted.y crop = page.image.crop(( cropx - margin_width, cropy - margin_height, cropx + margin_width + ow, cropy + margin_height + oh )) #can be in separate func? voted, ambiguous = self.extensions.IsVoted(crop, stats, choice) writein = False if voted: writein = self.extensions.IsWriteIn(crop, stats, choice) if writein: print "Gather information about the write-in at", print cropx - margin_width, cropy - margin_height, print cropx + self.writein_xoff + margin_width, print cropy + self.writein_yoff + margin_height return cropx, cropy, stats, crop, voted, writein, ambiguous
def extract_VOP(self, page, rotatefunc, scale, choice): """Extract a single oval, or writein box, from the specified ballot""" iround = lambda x: int(round(x)) adj = lambda a: int(round(const.dpi * a)) x, y = choice.coords() margin_width = page.margin_width margin_height = page.margin_height printed_oval_height = adj(0.097) #BEGIN SHARABLE scaled_page_offset_x = page.xoff / scale scaled_page_offset_y = page.yoff / scale self.log.debug("Incoming coords (%d,%d), \ page offsets (%d,%d) template offsets (%d,%d)" % (x, y, page.xoff, page.yoff, scaled_page_offset_x, scaled_page_offset_y)) # adjust x and y for the shift of landmark between template and ballot x = iround(x + scaled_page_offset_x - page.template.xoff) y = iround(y + scaled_page_offset_y - page.template.yoff) self.log.debug("Result of transform: (%d,%d)" % (x, y)) x, y = rotatefunc(x, y, scale) #END SHARABLE cropx, cropy = x, y #not adjusted like in PILB cropstats crop = page.image.crop( (cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + page.target_width, page.image.size[0] - 1), min(cropy + page.margin_height + page.target_height, page.image.size[1] - 1))) # check strip at center to look for either filled or empty oval; # recenter vertically stripe = crop.crop( ((crop.size[0] / 2), 0, (crop.size[0] / 2) + 1, crop.size[1] - 1)) before_oval = 0 after_oval = 0 oval = 0 stripedata = list(stripe.getdata()) for num, p in enumerate(stripedata): if p[0] > 245: before_oval += 1 else: try: if ((stripedata[before_oval + printed_oval_height - 2][0] < 245) or (stripedata[before_oval + printed_oval_height - 1][0] < 245) or (stripedata[before_oval + printed_oval_height][0] < 245) or (stripedata[before_oval + printed_oval_height + 1][0] < 245) or (stripedata[before_oval + printed_oval_height + 2][0] < 245)): oval_start = num oval_end = num + printed_oval_height after_oval = stripe.size[1] - (oval_start + printed_oval_height) break except IndexError: break #print cropy,before_oval,oval,after_oval afterlessbefore = int(round((after_oval - before_oval) / 2)) if abs(afterlessbefore) > 2: cropy -= afterlessbefore #print "Adjusted",cropy crop = page.image.crop( (cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + page.target_width, page.image.size[0] - 1), min(cropy + page.margin_height + page.target_height, page.image.size[1] - 1))) stats = Ballot.IStats(cropstats(crop, x, y)) voted, ambiguous = self.extensions.IsVoted(crop, stats, choice) writein = self.extensions.IsWriteIn(crop, stats, choice) if writein: crop = page.image.crop( (cropx - page.margin_width, cropy - page.margin_height, min(cropx + page.margin_width + self.writein_xoff, page.image.size[0] - 1), min(cropy + page.margin_height + self.writein_yoff, page.image.size[1] - 1))) return cropx, cropy, stats, crop, voted, writein, ambiguous
def build_regions(self, page, tm_list, dpi, stop=True, verbose=False): """ Build regions returns a list of Contests found on the page""" regionlist = [] onethird = int(round(dpi / 3.)) twelfth = int(round(dpi / 12.)) guard_twentieth = int(round(dpi / 20.)) guard_tenth = int(round(dpi / 10.)) guard_fifth = int(round(dpi / 5.)) cropnum = 0 column_width = 0 top_columns = page.top_columns tm_list = page.tm_list try: column_width = top_columns[1][0] - top_columns[0][0] except: column_width = 2 * dpi for top_xy in top_columns: matched = [] ovals = self.column_oval_search(page, top_xy[0]) textzones = self.column_textzone_search( page, top_xy[0] + (column_width / 2)) ovals.sort() textzones.sort() zonestart = 0 zoneend = 0 for textzone in textzones: #print "Processing textzone at (%d, %d)" % (top_xy[0], textzone) match = 0 # any text beginning from 1/16" above the oval # to 1/6" below # is associated with the oval for oval in ovals: if textzone > (oval - dpi / 16) and textzone < (oval + dpi / 4): match = oval #print "-->Match for oval %d" % (oval) if match > 0: if zonestart > 0 and zoneend > zonestart: #output last nonmatching textzone croplist = (top_xy[0] - dpi / 8, zonestart, top_xy[0] + column_width - dpi / 4, zoneend) #print "Croplist to output", croplist crop = page.image.crop(croplist) # The extensions object offers the ability # to provide the ocr and text cleanup functions # of your choice. text = self.extensions.ocr_engine(crop) text = self.extensions.ocr_cleaner(text) zonestart = 0 zoneend = 0 print "Contest Text: %s" % (text, ) regionlist.append( Ballot.Contest(top_xy[0], zonestart, column_width, dpi, 0, text)) # get text for ovals only once if match not in matched: #print "-->(not previously matched.)" croplist = (top_xy[0] + dpi / 4, match - (dpi / 50), top_xy[0] + column_width - dpi / 4, match + (dpi / 3)) #print croplist crop = page.image.crop(croplist) text = self.extensions.ocr_engine(crop) text = self.extensions.ocr_cleaner(text) print "Oval (%d, %d): %s" % (top_xy[0], match, text.strip()) if len(regionlist) > 0: regionlist[-1].append( #TODO add x2, y2, remove text Ballot.Choice(top_xy[0], match, text)) # now enter the just matched oval into a list # of already printed ovals matched.append(match) else: if zonestart == 0: zonestart = textzone # textzone includes both the 1/32 which may have contributed # a dark pixel into the triggering crop, # and an additional 16th inch to take into account the chance # there was another dark zone not quite long enough # to become a text zone zoneend = textzone + (dpi / 32) + (dpi / 16) #print "Textzone at y %d is not associated with an oval." % (textzone, ) return regionlist
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) #create initial top level dirs, if they do not exist for p in ( "%s" % ("templates"), "%s%d" % ("template_images", os.getpid()), "%s%d" % ("composite_images", os.getpid()), "results", "proc", "errors"): util.mkdirp(util.root(p)) next_ballot = next.File(util.root("nexttoprocess.txt"), const.num_pages) try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) # allow all instances to share a common template location, # though need per-pid locs for template_images and composite_images cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(const.dbname, const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() total_proc, total_unproc = 0, 0 base = os.path.basename # While ballot images exist in the directory specified in tevs.cfg, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. Repeat. #from guppy import hpy;hp=hpy();hp.setref();import gc;gc.disable();gc.collect();hp.setref() try: for n in next_ballot: gc.collect() unprocs = [incomingn(n + m) for m in range(const.num_pages)] if not os.path.exists(unprocs[0]): miss_counter += 1 log.info(base(unprocs[0]) + " does not exist. No more records to process") if miss_counter > 10: break continue #for i, f in enumerate(unprocs[1:]): # if not os.path.exists(f): # log.info(base(f) + " does not exist. Cannot proceed.") # for j in range(i): # log.info(base(unprocs[j]) + " will NOT be processed") # total_unproc += mark_error(None, *unprocs[:i-1]) #Processing log.info("Processing %s:\n %s" % (n, "\n".join("\t%s" % base(u) for u in unprocs)) ) try: ballot = ballotfrom(unprocs, extensions) results = ballot.ProcessPages() except BallotException as e: total_unproc += mark_error(e, *unprocs) log.exception("Could not process ballot") continue csv = Ballot.results_to_CSV(results) #moz = Ballot.results_to_mosaic(results) #Write all data #make dirs: proc1d = dirn("proc", n) resultsd = dirn("results", n) resultsfilename = filen(resultsd, n) for p in (proc1d, resultsd): util.mkdirp(p) try: results_to_vop_files(results,resultsfilename) except Exception as e: print e #write csv and mosaic util.genwriteto(resultsfilename + ".txt", csv) #write to the database try: dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc procs = [filen(proc1d, n + m) + const.filename_extension for m in range(const.num_pages)] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: util.fatal("Could not rename %s", a) total_proc += const.num_pages log.info("%d images processed", const.num_pages) #hp.heap().dump('prof.hpy');hp.setref();gc.collect();hp.setref();hp.heap().dump('prof.hpy') finally: cache.save_all() dbc.close() next_ballot.save() log.info("%d images processed", total_proc) if total_unproc > 0: log.warning("%d images NOT processed.", total_unproc)
def main(): miss_counter = 0 # get command line arguments cfg_file = get_args() # read configuration from tevs.cfg and set constants for this run config.get(cfg_file) util.mkdirp(const.root) log = config.logger(const.logfilename) #create initial top level dirs, if they do not exist for p in ("%s" % ("templates"), "%s%d" % ("template_images", os.getpid()), "%s%d" % ("composite_images", os.getpid()), "results", "proc", "errors"): util.mkdirp(util.root(p)) next_ballot = next.File(util.root("nexttoprocess.txt"), const.num_pages) try: ballotfrom = Ballot.LoadBallotType(const.layout_brand) except KeyError as e: util.fatal("No such ballot type: " + const.layout_brand + ": check " + cfg_file) # allow all instances to share a common template location, # though need per-pid locs for template_images and composite_images cache = Ballot.TemplateCache(util.root("templates")) extensions = Ballot.Extensions(template_cache=cache) # connect to db and open cursor if const.use_db: try: dbc = db.PostgresDB(const.dbname, const.dbuser) except db.DatabaseError: util.fatal("Could not connect to database") else: dbc = db.NullDB() total_proc, total_unproc = 0, 0 base = os.path.basename # While ballot images exist in the directory specified in tevs.cfg, # create ballot from images, get landmarks, get layout code, get votes. # Write votes to database and results directory. Repeat. #from guppy import hpy;hp=hpy();hp.setref();import gc;gc.disable();gc.collect();hp.setref() try: for n in next_ballot: gc.collect() unprocs = [incomingn(n + m) for m in range(const.num_pages)] if not os.path.exists(unprocs[0]): miss_counter += 1 log.info( base(unprocs[0]) + " does not exist. No more records to process") if miss_counter > 10: break continue #for i, f in enumerate(unprocs[1:]): # if not os.path.exists(f): # log.info(base(f) + " does not exist. Cannot proceed.") # for j in range(i): # log.info(base(unprocs[j]) + " will NOT be processed") # total_unproc += mark_error(None, *unprocs[:i-1]) #Processing log.info("Processing %s:\n %s" % (n, "\n".join("\t%s" % base(u) for u in unprocs))) try: ballot = ballotfrom(unprocs, extensions) results = ballot.ProcessPages() except BallotException as e: total_unproc += mark_error(e, *unprocs) log.exception("Could not process ballot") continue csv = Ballot.results_to_CSV(results) #moz = Ballot.results_to_mosaic(results) #Write all data #make dirs: proc1d = dirn("proc", n) resultsd = dirn("results", n) resultsfilename = filen(resultsd, n) for p in (proc1d, resultsd): util.mkdirp(p) try: results_to_vop_files(results, resultsfilename) except Exception as e: print e #write csv and mosaic util.genwriteto(resultsfilename + ".txt", csv) #write to the database try: dbc.insert(ballot) except db.DatabaseError: #dbc does not commit if there is an error, just need to remove #partial files remove_partial(resultsfilename + ".txt") remove_partial(resultsfilename + const.filename_extension) util.fatal("Could not commit vote information to database") #Post-processing # move the images from unproc to proc procs = [ filen(proc1d, n + m) + const.filename_extension for m in range(const.num_pages) ] for a, b in zip(unprocs, procs): try: os.rename(a, b) except OSError as e: util.fatal("Could not rename %s", a) total_proc += const.num_pages log.info("%d images processed", const.num_pages) #hp.heap().dump('prof.hpy');hp.setref();gc.collect();hp.setref();hp.heap().dump('prof.hpy') finally: cache.save_all() dbc.close() next_ballot.save() log.info("%d images processed", total_proc) if total_unproc > 0: log.warning("%d images NOT processed.", total_unproc)