Exemple #1
0
def left_aligned_table(source):
    assert source.rows() == 1 and source.columns() == 1
    bounds = source.bounds()
    contents = source.get_at(0, 0)[:]
    contents.sort(key=cmp_to_key(sort_topdown_ltr))

    table = []
    row = []
    columns = []
    last_y = contents[0].bounds().y1()
    for item in contents:
        item_bounds = item.bounds()
        if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
            break
        columns.append(item_bounds.x1())

    last_y = contents[0].bounds().y1()
    row = [[]] * len(columns)
    for item in contents:
        item_bounds = item.bounds()
        if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
            if any((len(c) == 0 for c in row)):
                for i in range(0, len(columns)):
                    table[-1][i] += row[i]
            else:
                table.append(row)
            row = [[]] * len(columns)
            last_y = item_bounds.y1()

        for i in range(0, len(columns)):
            if pdftable.pretty_much_equal(item_bounds.x1(), columns[i]):
                col_index = i
                break
            else:
                print(columns)
                print(contents)
                # RG: Comment out and it works
                # TODO_: Find what pretty_much_equal() checks
                #raise Exception("No matching column!")

        row[col_index] = [item]

    if any((len(c) == 0 for c in row)):
        for i in range(0, len(columns)):
            table[-1][i] += row[i]
    else:
        table.append(row)

    return pdftable.ImplicitTable(bounds, table)
Exemple #2
0
    def __merge_text(self, lines):
        def sort_text(a, b):
            if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()):
                if a.rect.y1() < b.rect.y1():
                    return -1
                if a.rect.y1() == b.rect.y1():
                    return 1
                return 0
            if a.rect.x1() < b.rect.x1():
                return -1
            return 1

        if len(lines) == 0: return

        lines.sort(key=cmp_to_key(sort_topdown_ltr))
        merged = [lines[0]]
        for line in lines[1:]:
            last = merged[-1]
            same_x = pdftable.pretty_much_equal(line.rect.x1(), last.rect.x1())
            same_size = last.font_size() == line.font_size()
            decent_descent = line.approx_rect.y1() - last.approx_rect.y2(
            ) < 1.2
            if same_x and same_size and decent_descent:
                lastChar = last.chars[-1].get_text()[-1]
                if not (lastChar == "-" or lastChar == "/"):
                    last.append_char(" ")
                last.append(line)
            else:
                merged.append(line)
        return merged
Exemple #3
0
	def __merge_text(self, lines):
		def sort_text(a, b):
			if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()):
				if a.rect.y1() < b.rect.y1():
					return -1
				if a.rect.y1() == b.rect.y1():
					return 1
				return 0
			if a.rect.x1() < b.rect.x1():
				return -1
			return 1
		
		if len(lines) == 0: return
		
		lines.sort(cmp=sort_topdown_ltr)
		merged = [lines[0]]
		for line in lines[1:]:
			last = merged[-1]
			same_x = pdftable.pretty_much_equal(line.rect.x1(), last.rect.x1())
			same_size = last.font_size() == line.font_size()
			decent_descent = line.approx_rect.y1() - last.approx_rect.y2() < 1.2
			if same_x and same_size and decent_descent:
				lastChar = last.chars[-1].get_text()[-1]
				if not (lastChar == "-" or lastChar == "/"):
					last.append_char(" ")
				last.append(line)
			else:
				merged.append(line)
		return merged
Exemple #4
0
def left_aligned_table(source):
	assert source.rows() == 1 and source.columns() == 1
	bounds = source.bounds()
	contents = source.get_at(0, 0)[:]
	contents.sort(cmp=sort_topdown_ltr)
	
	table = []
	row = []
	columns = []
	last_y = contents[0].bounds().y1()
	for item in contents:
		item_bounds = item.bounds()
		if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
			break
		columns.append(item_bounds.x1())
	
	last_y = contents[0].bounds().y1()
	row = [[]] * len(columns)
	for item in contents:
		item_bounds = item.bounds()
		if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
			if any((len(c) == 0 for c in row)):
				for i in xrange(0, len(columns)):
					table[-1][i] += row[i]
			else: table.append(row)
			row = [[]] * len(columns)
			last_y = item_bounds.y1()
		
		for i in xrange(0, len(columns)):
			if pdftable.pretty_much_equal(item_bounds.x1(), columns[i]):
				col_index = i
				break
		else:
			print columns
			print contents
			raise Exception("No matching column!")
		
		row[col_index] = [item]
	
	if any((len(c) == 0 for c in row)):
		for i in xrange(0, len(columns)):
			table[-1][i] += row[i]
	else: table.append(row)
	
	return pdftable.ImplicitTable(bounds, table)
Exemple #5
0
 def sort_text(a, b):
     if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()):
         if a.rect.y1() < b.rect.y1():
             return -1
         if a.rect.y1() == b.rect.y1():
             return 1
         return 0
     if a.rect.x1() < b.rect.x1():
         return -1
     return 1
Exemple #6
0
		def sort_text(a, b):
			if pdftable.pretty_much_equal(a.rect.x1(), b.rect.x1()):
				if a.rect.y1() < b.rect.y1():
					return -1
				if a.rect.y1() == b.rect.y1():
					return 1
				return 0
			if a.rect.x1() < b.rect.x1():
				return -1
			return 1
Exemple #7
0
def center_aligned_table(source):
    assert source.rows() == 1 and source.columns() == 1
    bounds = source.bounds()
    contents = source.get_at(0, 0)[:]
    contents.sort(key=cmp_to_key(sort_topdown_ltr))
    column_centers = []
    last_y = contents[0].bounds().y1()
    for item in contents:
        if not pdftable.pretty_much_equal(last_y, item.bounds().y1()): break
        column_centers.append(item.bounds().xmid())

    table = []
    row = [[]] * len(column_centers)
    for item in contents:
        item_bounds = item.bounds()
        if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
            if any((len(c) == 0 for c in row)):
                for i in range(0, len(column_centers)):
                    table[-1][i] += row[i]
            else:
                table.append(row)
            row = [[]] * len(column_centers)
            last_y = item_bounds.y1()

        col_index = None
        min_dist = float("inf")
        for i in range(0, len(column_centers)):
            distance = abs(item_bounds.xmid() - column_centers[i])
            if distance < min_dist:
                min_dist = distance
                col_index = i

        row[col_index] = [item]

    if any((len(c) == 0 for c in row)):
        for i in range(0, len(column_centers)):
            table[-1][i] += row[i]
    else:
        table.append(row)

    return pdftable.ImplicitTable(bounds, table)
Exemple #8
0
def center_aligned_table(source):
	assert source.rows() == 1 and source.columns() == 1
	bounds = source.bounds()
	contents = source.get_at(0, 0)[:]
	contents.sort(cmp=sort_topdown_ltr)
	column_centers = []
	last_y = contents[0].bounds().y1()
	for item in contents:
		if not pdftable.pretty_much_equal(last_y, item.bounds().y1()): break
		column_centers.append(item.bounds().xmid())
	
	table = []
	row = [[]] * len(column_centers)
	for item in contents:
		item_bounds = item.bounds()
		if not pdftable.pretty_much_equal(item_bounds.y1(), last_y):
			if any((len(c) == 0 for c in row)):
				for i in xrange(0, len(column_centers)):
					table[-1][i] += row[i]
			else: table.append(row)
			row = [[]] * len(column_centers)
			last_y = item_bounds.y1()
		
		col_index = None
		min_dist = float("inf")
		for i in xrange(0, len(column_centers)):
			distance = abs(item_bounds.xmid() - column_centers[i])
			if distance < min_dist:
				min_dist = distance
				col_index = i
		
		row[col_index] = [item]
	
	if any((len(c) == 0 for c in row)):
		for i in xrange(0, len(column_centers)):
			table[-1][i] += row[i]
	else: table.append(row)
	
	return pdftable.ImplicitTable(bounds, table)