forked from tuxor1337/springerdownload
/
springer_download.py
executable file
·157 lines (138 loc) · 5.82 KB
/
springer_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
import sys, string, shutil, os
from gettext import gettext as _
from springerdl import meta, util, download, merge
from springerdl.pyPdf_ext import PdfFileReader_ext as PdfFileReader
from springerdl.const import *
def main(argv=sys.argv):
from os import isatty
if (isatty(0) or len(argv) > 1) and "--gui" not in argv[1:]:
from springerdl.interface.cli import cli_main
interface = cli_main
else:
from springerdl.interface.gui import gui_main
interface = gui_main
interface(springer_fetch)
return 0
def springer_fetch(interface):
util.setupOpener(interface.option("proxy"), interface.option("user-agent"))
springer_key = util.parseSpringerURL(interface.option("springer_name"))
book_url = '%s/book/10.1007/%s' % (SPRINGER_URL, springer_key)
if interface.option('verbose'):
print("ImageMagick: %s" % IM_BIN)
print("Ghostscript: %s" % GS_BIN)
print("PDF Toolkit: %s" % PDFTK_BIN)
interface.doing(_("Fetching book info"))
root = util.getElementTree(book_url)
if root == None:
interface.err(_("The specified identifier doesn't point to an existing Springer book resource"))
return False
info = meta.fetchBookInfo(root)
interface.done()
interface.out(", ".join(info['authors']))
bookinfo = info['title']
if info['subtitle'] != None:
bookinfo += ": %s" % (info['subtitle'])
bookinfo += " ("
bookinfo += _("%d chapters") % (info['chapter_cnt'])
if info['full_pdf'] != None:
bookinfo += _(", full book PDF available")
bookinfo += ")"
interface.out(bookinfo)
if info['noaccess'] and interface.option('force-full-access'):
sys.exit()
outf = interface.option('output-file')
valid_chars = "-_.,() %s%s" % (string.ascii_letters, string.digits)
if outf == None:
if interface.option('autotitle'):
outf = "%s - %s.pdf" % (", ".join(info['authors']), \
info['title'])
else:
outf = info['online_isbn']+".pdf"
outf = "".join(c if c in valid_chars else "_" for c in outf)
basename = os.path.basename(outf)
target_dir = os.path.dirname(outf)
if target_dir == "": target_dir = os.getcwd()
basename = "".join(c if c in valid_chars else "_" for c in basename)
outf = os.path.join(target_dir, basename)
if info['full_pdf'] != None and not interface.option('ignore-full'):
pgs = interface.progress(_("Downloading %d/%d kB"))
pdf = download.fetch_pdf_with_pgs(info['full_pdf'], pgs)
pgs.destroy()
interface.doing(_("Moving downloaded file to %s") % (target_dir))
shutil.move(pdf.name, outf)
interface.done()
return 0
interface.doing(_("Fetching chapter data"))
toc = meta.fetchToc(root, book_url)
if interface.option('sorted'):
toc = sorted(toc, key=lambda el: el['page_range'][0])
interface.done()
if interface.option('use-pdfs') != None:
data = [0, interface.option('use-pdfs')[:]]
def count_pdfs(el, _, d):
if el['noaccess'] != None or el['pdf_url'] != "":
interface.out("%s = %s" % (d[1][d[0]],el['title']))
el['pdf_file'] = open(d[1][d[0]], "rb")
d[0] += 1
util.tocIterateRec(toc, count_pdfs, data)
if interface.option('verbose'):
print(util.printToc(toc))
if data[0] != len(data[1]):
interface.err(_("Expected %d pdf files, got %s!") %
(pdf_total_count[0], len(interface.option('use-pdfs'))))
return 1
else:
toc = util.getAccessibleToc(toc)
if interface.option('verbose'):
print(util.printToc(toc))
download.pdf_files(toc, interface.progress(""), \
interface.option('pause'))
first_pdf_file = []
def get_first_pdf_file_from_toc(el, lvl, data):
if len(data) == 0 and "pdf_file" in el: data.append(el['pdf_file'])
util.tocIterateRec(toc, get_first_pdf_file_from_toc, first_pdf_file)
if len(first_pdf_file) > 0:
inputPDF = PdfFileReader(first_pdf_file[0])
tmp_box = inputPDF.pages[0].mediaBox
info['pagesize'] = (tmp_box[2], tmp_box[3])
else: info['pagesize'] = (0, 0)
if interface.option('cover'):
if IM_BIN == None:
interface.err(_("Skipping cover due to missing ImageMagick binary."))
else:
interface.doing(_("Fetching book cover"))
cover = meta.fetchCover(info['print_isbn'], \
info['pagesize'])
if cover:
toc.insert(0, {
'pdf_file': cover,
'children': [],
'page_range': [0,0],
'title': "Cover",
})
interface.done()
else:
interface.done(_("not available"))
if interface.option('download-only'):
interface.doing(_("Moving downloaded files to %s") % (target_dir))
file_list = []
def append_to_list(el, _, flist):
if 'pdf_file' in el and el['pdf_file'] != None:
flist.append([el['title'], el['pdf_file']])
util.tocIterateRec(toc, append_to_list, file_list)
for i,f in enumerate(file_list):
if interface.option('autotitle'):
chpt = "".join(c if c in valid_chars else "_" for c in f[0])
chpt += ".pdf"
else:
chpt = basename
target_base = "%02d-%s" % (i, chpt)
f[1].close()
shutil.move(f[1].name, os.path.join(target_dir, target_base))
interface.done()
else:
merge.merge_by_toc(toc, info, outf, interface)
return 0
if __name__ == "__main__":
sys.exit(main())