def process(pdf, coarse_rotate_angle):
	""" Extract images, fix skew, and get line positions, writing images and data to files. There is no return value."""

	## Get images from PDF
	images = extract.extractImages(pdf)
	## Straighten images
	if coarse_rotate_angle < 0:
		coarse_rotate_angle = 360 - (abs(coarse_rotate_angle) % 360)
	else:
		coarse_rotate_angle = coarse_rotate_angle % 360
	print "angle:" + str(coarse_rotate_angle)
	images = [skew.straighten(skew.coarserotate(i, coarse_rotate_angle)) for i in images]
	print "images found: " + str(len(images))

	## Iterate through list of images, storing lists of corresponding line locations
	## in hash table.
	## Then write each image to file.
	## File naming convention: <original filename><page number>.jpg
	n=1
	lines = {} 
	for i in images:
		p = pdf+str(n)+".jpg"
		lines[n]=getlines.getLines(i)
		cv2.imwrite(p,i)
		n = n+1
	## Write line data in JSON format to a .txt file
	## File naming convention: <original filename>.json.txt
	with open ((pdf+".json.txt"),'w') as outfile:
		json.dump(lines,outfile)
Example #2
0
def process(pdf, coarse_rotate_angle):
    """ Extract images, fix skew, and get line positions, writing images and data to files. There is no return value."""

    ## Get images from PDF
    images = extract.extractImages(pdf)
    ## Straighten images
    if coarse_rotate_angle < 0:
        coarse_rotate_angle = 360 - (abs(coarse_rotate_angle) % 360)
    else:
        coarse_rotate_angle = coarse_rotate_angle % 360
    print "angle:" + str(coarse_rotate_angle)
    images = [
        skew.straighten(skew.coarserotate(i, coarse_rotate_angle))
        for i in images
    ]
    print "images found: " + str(len(images))

    ## Iterate through list of images, storing lists of corresponding line locations
    ## in hash table.
    ## Then write each image to file.
    ## File naming convention: <original filename><page number>.jpg
    n = 1
    lines = {}
    for i in images:
        p = pdf + str(n) + ".jpg"
        lines[n] = getlines.getLines(i)
        cv2.imwrite(p, i)
        n = n + 1
    ## Write line data in JSON format to a .txt file
    ## File naming convention: <original filename>.json.txt
    with open((pdf + ".json.txt"), 'w') as outfile:
        json.dump(lines, outfile)
Example #3
0
 def setUp(self):
     #text with large amount of black space from copier error
     self.image = cv2.imread("testimg/blackmarginsfixed.jpg")
     self.img, self.angle = skew.straighten(
         cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY))
     self.lines = getlines.getLines(self.img)
Example #4
0
 def setUp(self):
     #text with one-area noise (from bent corner)
     self.image = cv2.imread("testimg/noisy2fixed.jpg")
     self.img, self.angle = skew.straighten(
         cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY))
     self.lines = getlines.getLines(self.img)
Example #5
0
 def setUp(self):
     #text with background noise (from newspaper)
     self.image = cv2.imread("testimg/noisy1fixed.jpg")
     self.img, self.angle = skew.straighten(
         cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY))
     self.lines = getlines.getLines(self.img)
Example #6
0
 def setUp(self):
     #one picture image
     self.image = cv2.imread("testimg/rotated.jpg")
     self.img, self.angle = skew.straighten(
         cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY))
     self.lines = getlines.getLines(self.img)
Example #7
0
 def test_calculate_angle(self):
     #compares calculated angle to observed angle to make sure they are equal
     img, angle = skew.straighten(
         cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY))
     self.assertTrue(abs(angle - 0) < 2)