def process(pdf, coarse_rotate_angle): """ Extract images, fix skew, and get line positions, writing images and data to files. There is no return value.""" ## Get images from PDF images = extract.extractImages(pdf) ## Straighten images if coarse_rotate_angle < 0: coarse_rotate_angle = 360 - (abs(coarse_rotate_angle) % 360) else: coarse_rotate_angle = coarse_rotate_angle % 360 print "angle:" + str(coarse_rotate_angle) images = [skew.straighten(skew.coarserotate(i, coarse_rotate_angle)) for i in images] print "images found: " + str(len(images)) ## Iterate through list of images, storing lists of corresponding line locations ## in hash table. ## Then write each image to file. ## File naming convention: <original filename><page number>.jpg n=1 lines = {} for i in images: p = pdf+str(n)+".jpg" lines[n]=getlines.getLines(i) cv2.imwrite(p,i) n = n+1 ## Write line data in JSON format to a .txt file ## File naming convention: <original filename>.json.txt with open ((pdf+".json.txt"),'w') as outfile: json.dump(lines,outfile)
def process(pdf, coarse_rotate_angle): """ Extract images, fix skew, and get line positions, writing images and data to files. There is no return value.""" ## Get images from PDF images = extract.extractImages(pdf) ## Straighten images if coarse_rotate_angle < 0: coarse_rotate_angle = 360 - (abs(coarse_rotate_angle) % 360) else: coarse_rotate_angle = coarse_rotate_angle % 360 print "angle:" + str(coarse_rotate_angle) images = [ skew.straighten(skew.coarserotate(i, coarse_rotate_angle)) for i in images ] print "images found: " + str(len(images)) ## Iterate through list of images, storing lists of corresponding line locations ## in hash table. ## Then write each image to file. ## File naming convention: <original filename><page number>.jpg n = 1 lines = {} for i in images: p = pdf + str(n) + ".jpg" lines[n] = getlines.getLines(i) cv2.imwrite(p, i) n = n + 1 ## Write line data in JSON format to a .txt file ## File naming convention: <original filename>.json.txt with open((pdf + ".json.txt"), 'w') as outfile: json.dump(lines, outfile)
def setUp(self): #text with large amount of black space from copier error self.image = cv2.imread("testimg/blackmarginsfixed.jpg") self.img, self.angle = skew.straighten( cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)) self.lines = getlines.getLines(self.img)
def setUp(self): #text with one-area noise (from bent corner) self.image = cv2.imread("testimg/noisy2fixed.jpg") self.img, self.angle = skew.straighten( cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)) self.lines = getlines.getLines(self.img)
def setUp(self): #text with background noise (from newspaper) self.image = cv2.imread("testimg/noisy1fixed.jpg") self.img, self.angle = skew.straighten( cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)) self.lines = getlines.getLines(self.img)
def setUp(self): #one picture image self.image = cv2.imread("testimg/rotated.jpg") self.img, self.angle = skew.straighten( cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)) self.lines = getlines.getLines(self.img)
def test_calculate_angle(self): #compares calculated angle to observed angle to make sure they are equal img, angle = skew.straighten( cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)) self.assertTrue(abs(angle - 0) < 2)