-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_paper_titles.py
50 lines (37 loc) · 1.45 KB
/
extract_paper_titles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
"""extract_paper_titles.py: extract title from pdf journals/papers and
rename the pdf file itself
"""
import argparse
import os
from PyPDF2 import PdfFileWriter, PdfFileReader
def do_cmd_args_line():
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument('-p', '--path', required=True, help='target path')
parser.add_argument('-d', '--dest', required=True, help='dest path')
parser.add_argument('--dryrun', action='store_true', help='dryrun')
args = parser.parse_args()
return args
def main():
args = do_cmd_args_line()
for f in os.listdir(args.path):
if f.endswith('.pdf'):
fname = os.path.join(args.path, f)
pdfile = PdfFileReader(file(fname, 'rb'))
title = pdfile.getDocumentInfo().title
subject = pdfile.getDocumentInfo().subject
author = pdfile.getDocumentInfo().author
if author == None or author == '':
author = 'Unknown'
if title == None or title == '':
title = os.path.splitext(f)[0]
tgtfname = '[{0}] {1}.pdf'.format(author, title)
ftgtname = os.path.join(args.dest, tgtfname)
print 'renaming {0} -> {1}'.format(fname, ftgtname)
if not args.dryrun:
try:
os.rename(fname, ftgtname)
except Exception as e:
print e
if __name__ == '__main__':
main()