Example #1
0
 def __init__(self, baseline, font):
     LTComponent.__init__(self, (+INF, +INF, -INF, -INF))
     self._chars = []
     self._text = []
     self.baseline = baseline
     self.font = font
     self.type = None
     return
Example #2
0
 def __init__(self,baseline,font):
     LTComponent.__init__(self, (+INF, +INF, -INF, -INF))
     self._chars   = []
     self._text    = []
     self.baseline = baseline
     self.font     = font
     self.type     = None
     return
Example #3
0
 def __init__(self, bbox):
     LTComponent.__init__(self, bbox)
     self._chars = []
     self._figs = []
     self._curves = []
     self.type = None
     self.font = None
     return
Example #4
0
 def __init__(self, bbox):
     LTComponent.__init__(self, bbox)
     self._chars  = []
     self._figs   = []
     self._curves = []
     self.type = None
     self.font = None
     return
Example #5
0
    def to_tables7(self):
        lines = []
        for t in filter(lambda x: x not in self.multi_col_boxes, self.texts):
            for line in t:
                placed = False
                for l in lines:
                    if (l['contain'].voverlap(line) / l['contain'].height >
                            0.9) and (l['contain'].voverlap(line) /
                                      l['contain'].height < 1.1):
                        l['texts'].append(line)
                        l['contain'].set_bbox(
                            (min(l['contain'].x0, line.x0), l['contain'].y0,
                             max(l['contain'].x1, line.x1), l['contain'].y1))
                        placed = True
                if not placed:
                    lines.append({
                        'contain': LTComponent(line.bbox),
                        'texts': [line]
                    })
        lines.sort(key=lambda x: x['contain'].y0)

        for line in lines:
            line['cells'] = [None] * len(self.finalcols2)
            for columntext in line['texts']:
                for i, column in enumerate(self.finalcols2):
                    if column['contain'].hoverlap(columntext):
                        if line['cells'][i] is None:
                            line['cells'][i] = columntext
                            break
                        else:
                            # seems like the parser library sometimes duplicates text, possible bug
                            pass
        self.lines = lines
Example #6
0
    def __init__(self, network):
        if len(network) <= 2:
            raise self.EmptyTableError

        # construct rows and columns borders by distinct x and y's.
        self._rows_borders = sorted(list(
            set(point.y for point in network.points)))
        self._columns_borders = sorted(list(
            set(point.x for point in network.points)))

        LTComponent.__init__(self, (self._columns_borders[0],
                                    self._rows_borders[0],
                                    self._columns_borders[-1],
                                    self._rows_borders[-1]))

        self._cells = self._create_cells(network)
        self._elements = self._build_elements(self._cells)
Example #7
0
	def newLTCharInit(self, matrix, font, fontsize, scaling, rise,
				text, textwidth, textdisp):
		LTText.__init__(self)
		# Patched in line
		self.font = font
		self.fontsize = fontsize
		self._text = text
		self.matrix = matrix
		self.fontname = font.fontname
		self.adv = textwidth * fontsize * scaling
		# compute the boundary rectangle.
		if font.is_vertical():
			# vertical
			width = font.get_width() * fontsize
			(vx, vy) = textdisp
			if vx is None:
				vx = width//2
			else:
				vx = vx * fontsize * .001
			vy = (1000 - vy) * fontsize * .001
			tx = -vx
			ty = vy + rise
			bll = (tx, ty+self.adv)
			bur = (tx+width, ty)
		else:
			# horizontal
			height = font.get_height() * fontsize
			descent = font.get_descent() * fontsize
			ty = descent + rise
			bll = (0, ty)
			bur = (self.adv, ty+height)
		(a, b, c, d, e, f) = self.matrix
		self.upright = (0 < a*d*scaling and b*c <= 0)
		(x0, y0) = apply_matrix_pt(self.matrix, bll)
		(x1, y1) = apply_matrix_pt(self.matrix, bur)
		if x1 < x0:
			(x0, x1) = (x1, x0)
		if y1 < y0:
			(y0, y1) = (y1, y0)
		LTComponent.__init__(self, (x0, y0, x1, y1))
		if font.is_vertical():
			self.size = self.width
		else:
			self.size = self.height
		return
Example #8
0
 def __init__(self, annoObj, uri, pos, pageid):
     self.origObjs = [annoObj]
     self.gotoLoc = uri
     self.assocText = [""]
     self.assocTextIn = 0
     self.positions = [[LTComponent(pos), pageid]]
     self.destPage = None
     self.unparseCite = ""
     self.finalCiteStr = ""
     self.papObj = None
     self.papLink = ""
     self.author = ""
     self.year = ""
Example #9
0
 def to_tables3(self):
     colgroups = []
     for c in self.columns:
         colg = None
         for d in colgroups:
             if c is not d:
                 if all(c['contain'].is_hoverlap(e['contain'])
                        for e in d['cols']):
                     d['cols'].append(c)
                     colg = d
                     break
         if not colg:
             colgroups.append({
                 'contain': LTComponent(c['contain'].bbox),
                 'cols': [c]
             })
     logger.info('{x} colgroups'.format(x=len(colgroups)))
     self.colgroups = colgroups
Example #10
0
 def given_plane_with_one_object(object_size=50, gridsize=50):
     bounding_box = (0, 0, 100, 100)
     plane = Plane(bounding_box, gridsize)
     obj = LTComponent((0, 0, object_size, object_size))
     plane.add(obj)
     return plane, obj
Example #11
0
 def __init__(self, bbox):
     LTComponent.__init__ (self, bbox)
     self.text_lines = []
Example #12
0
 def __init__(self, bbox, text):
     LTComponent.__init__ (self, bbox)
     self.text = text
Example #13
0
 def to_tables2(self):
     columns = []
     # texts.sort(key=lambda x: x.width)
     for e in self.layout:
         if isinstance(e, LTTextBoxHorizontal):
             logger.info('Finding a column for box {i}'.format(i=e.index))
             ##im2 = im.copy()
             ##d = ImageDraw.Draw(im2)
             col = None
             for c in columns:
                 if (e.x1 < c['contain'].x1) and (e.x0 > c['contain'].x0):
                     if (e.width / c['contain'].width) < 0.8:
                         logger.info(
                             'Item too small, column may be several columns wide'
                         )
                     else:
                         logger.info('Item totally contained in column')
                         logger.info(
                             '{ex1} < {cx1} and {ex0} > {cx0}'.format(
                                 ex1=e.x1,
                                 cx1=c['contain'].x1,
                                 ex0=e.x0,
                                 cx0=c['contain'].x0))
                         col = c
                         col['boxes'].append(e)
                         col['contain'].set_bbox(
                             (c['contain'].x0, min(c['contain'].y0,
                                                   e.y0), c['contain'].x1,
                              max(c['contain'].y1, e.y1)))
                         ##d.rectangle([int(c['contain'].bbox[0] * scale_factor), h - int(c['contain'].bbox[3] * scale_factor), int(c['contain'].bbox[2] * scale_factor), h - int(c['contain'].bbox[1] * scale_factor)], fill=None, outline=c['color'], width=5 * scale_factor)
                     break
                 elif ((c['contain'].hoverlap(e) / c['contain'].width) >
                       0.9) and ((c['contain'].hoverlap(e) /
                                  c['contain'].width) < 1.1):
                     logger.info('Item is within 10% of current col width')
                     logger.info(
                         'Overlap of {hdist}, column width of {width}'.
                         format(hdist=c['contain'].hoverlap(e),
                                width=c['contain'].width))
                     col = c
                     col['boxes'].append(e)
                     col['contain'].set_bbox(
                         (min(c['contain'].x0,
                              e.x0), min(c['contain'].y0,
                                         e.y0), max(c['contain'].x1, e.x1),
                          max(c['contain'].y1, e.y1)))
                     ##d.rectangle([int(c['contain'].bbox[0] * scale_factor), h - int(c['contain'].bbox[3] * scale_factor), int(c['contain'].bbox[2] * scale_factor), h - int(c['contain'].bbox[1] * scale_factor)], fill=None, outline=c['color'], width=5 * scale_factor)
                     break
             if not col:
                 logger.info('Creating new column')
                 col = {
                     'contain':
                     LTComponent(e.bbox),
                     'boxes':
                     list(e),
                     'color':
                     random.choice(list(PIL.ImageColor.colormap.keys()))
                 }
                 columns.append(col)
                 columns.sort(key=lambda x: x['contain'].width)
                 ##d.rectangle([int(col['contain'].bbox[0] * scale_factor), h - int(col['contain'].bbox[3] * scale_factor), int(col['contain'].bbox[2] * scale_factor), h - int(col['contain'].bbox[1] * scale_factor)], fill=None, outline=col['color'], width=5 * scale_factor)
             ##im2.save('page0.{x}.png'.format(x=e.index), "PNG")
     self.columns = columns
Example #14
0
 def addPosition(self, pos, pageid):
     self.positions.append([LTComponent(pos), pageid])
Example #15
0
 def __init__(self, ltimage):
     assert(isinstance(ltimage, LTImage))
     LTComponent.__init__(self, ltimage.bbox)
     self._name = ltimage.name
     self._stream = ltimage.stream
Example #16
0
    def _create_cells(network):
        """
        Creates cells from the network and returns then
        as LTComponents.
        """
        squares_taken = defaultdict(set)
        cells = set()

        def city_distance(point, point_prime):
            return abs(point.x - point_prime.x) + abs(point.y - point_prime.y)

        def is_perpendicular(v1_x, v1_y, v2_x, v2_y):
            return v1_x*v2_x + v1_y*v2_y == 0

        for point in sorted(network, key=lambda p: (p.x, p.y)):
            for l1 in sorted(network.links[point],
                             key=lambda p: city_distance(p, point)):
                valid_links = [
                    link for link in network.links[point] if link != l1 and
                    is_perpendicular(link.x - point.x, link.y - point.y,
                                     l1.x - point.x, l1.y - point.y)]

                for l2 in sorted(valid_links,
                                 key=lambda p: city_distance(p, point)):
                    inter = network.links[l2].intersection(network.links[l1])
                    intersection = list(inter)

                    # remove initial point
                    intersection.remove(point)

                    if len(intersection) == 0:
                        continue

                    # sort by areas: smallest area first
                    area = lambda p: (p.x - point.x)*(p.y - point.y)
                    intersection.sort(key=area)

                    # square is formed by [point, l1, l2, last_point], in this
                    # order.
                    points = [point, l1, l2, intersection[0]]

                    # compute middle position of the square
                    middle_x = sum(point.x for point in points)/4.
                    middle_y = sum(point.y for point in points)/4.

                    # check if any point already has one of its squares
                    # (at most 4) used.
                    is_taken = False
                    square = range(4)
                    for i in range(4):
                        # compute the position of the point in relation to the
                        # middle corresponding to one of the following squares
                        # position: [(1,1), (-1,1), (1,-1), (-1,-1)]
                        vx = middle_x - points[i].x
                        vy = middle_y - points[i].y

                        square[i] = (int(vx/abs(vx)), int(vy/abs(vy)))

                        belongs = square[i] in squares_taken[points[i]]

                        is_taken = is_taken or belongs

                    if not is_taken:

                        cell = LTComponent((point.x, point.y,
                                            intersection[0].x, intersection[0].y))

                        cells.add(cell)

                        for i in range(4):
                            squares_taken[points[i]].add(square[i])
                        break

        return cells