forked from ciju/split-pdf-slides
-
Notifications
You must be signed in to change notification settings - Fork 0
/
splitpdf.py
executable file
·137 lines (104 loc) · 4.01 KB
/
splitpdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/python
import copy, sys, pprint
import argparse
__author__ = "ciju.ch3rian@gmail.com (ciju cherian)"
from pypdf2 import PdfFileWriter, PdfFileReader, PageObject, ContentStream, DictionaryObject, DecodedStreamObject, NameObject
def page4eachXobj(spage):
pl = {}
objlst = spage["/Resources"].getObject()["/XObject"]
for key in objlst.keys():
page = copy.copy(spage)
r,x,c = [ NameObject(n) for n in ("/Resources", "/XObject", "/Contents") ]
page[r] = DictionaryObject()
page[r][x] = DictionaryObject()
page[r][x][key] = dict.__getitem__(objlst,key)
try:
page.mediaBox = page[r][x][key]["/BBox"]
except: pass
cs = DecodedStreamObject()
cs.setData("q\n"+key+" Do\nQ")
page[c] = ContentStream(cs, page.pdf)
pl[key] = page
# return pages in the order of original /Contents description
stream = spage["/Contents"].getObject().getData().split()
return [pl[x] for x in stream if x in pl.keys()]
def copy_page(page):
p = PageObject(page)
p.update(page)
p.mediaBox = copy.copy(page.mediaBox)
return p
def make_page(lx, ly, ux, uy, page, o):
p = copy_page(page)
p.mediaBox.lowerLeft = (lx, ly)
p.mediaBox.upperRight = (ux, uy)
return p
def get_points(l, u, s):
xl = [l]
for i in range(1, s+1): xl.append(i*(u-l)/s)
return xl
def split_list(alist, wanted_parts=1):
length = len(alist)
return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts]
for i in range(wanted_parts) ]
def rotateListOfList(ll):
lst = zip(*ll)
lst.reverse()
return [item for sublist in lst for item in sublist]
def mediabox_slide_split(page, output, x, y, lst):
minx, miny = page.mediaBox.lowerLeft
maxx, maxy = page.mediaBox.upperRight
xl = get_points(minx, maxx, int(x)) # columns
yl = get_points(miny, maxy, int(y)) # rows
yl.reverse()
pl=[]
for j in range(len(yl)-1):
for i in range(len(xl)-1):
pl.append(make_page(xl[i], yl[j+1], xl[i+1], yl[j]\
, page, output))
if "/Rotate" in page:
if page["/Rotate"] == 90:
lst = rotateListOfList( split_list(lst, y) )
for i in lst:
output.addPage(pl[int(i)-1])
def try_xobject_slide_split(p, o):
for i in page4eachXobj(p):
o.addPage(i)
def ssplit(readf, writef, fn, *arg):
out = PdfFileWriter()
inp = PdfFileReader(readf, False)
for i in range(inp.getNumPages()):
page = inp.getPage(i)
fn(page, out, *arg)
out.write(writef)
def mediabox_pdf_split(readf, writef, x, y, lst):
ssplit(readf, writef, mediabox_slide_split, x, y, lst)
def xobject_pdf_split(readf, writef):
ssplit(readf, writef, try_xobject_slide_split)
def split_pdf(readf, writef, r, c, lst):
try:
xobject_pdf_split(readf, writef)
except (KeyError, AttributeError):
if (len(lst) != r*c): lst = range(1, r*c+1)
mediabox_pdf_split(readf, writef, r, c, lst)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-o', default='out.pdf', help='Output file')
parser.add_argument('-i', required=True, help='Input file')
parser.add_argument('-m', '--mediabox', action='store_true', help='Split pdf using mediabox')
parser.add_argument('-s', '--size', type=int, nargs=2, help='Row and column size of the split')
parser.add_argument('--seq', type=int, nargs=argparse.REMAINDER, help='Sequence of the slides, in the input file')
args = parser.parse_args()
inFile = file(args.i, "rb")
outFile = file(args.o, "wb")
r, c = args.size
seq = args.seq or []
if not args.mediabox:
split_pdf(inFile, outFile, r, c, seq);
sys.exit(0)
if args.mediabox and len(seq) == 0:
seq = range(1, r*c + 1)
if args.mediabox and len(seq) != r*c:
print "Sequence doesn't span the rows and columns"
sys.exit(2)
print args.i, args.o, r, c, seq
mediabox_pdf_split(inFile, outFile, r, c, seq)