-
Notifications
You must be signed in to change notification settings - Fork 0
/
mobi_renamer.py
140 lines (121 loc) · 5.71 KB
/
mobi_renamer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# -*- coding: windows-1252 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
#
"""Given a single (unencrypted) MOBI format file rename it to:
author - title.FILE_EXTENSION
TODO:
* Author name maybe surname first or first name first depending on meta data, compare:
* Koontz, Dean - Odd Thomas - You Are Destined to Be Together Forever (Short Story) (Kindle Single).azw3
* Richard Kadrey - Hollywood Dead.mobi
* directories/multiple files
* allow control over the format of the new filename
* other meta data, series information?
* work out what's the best library for this - if in doubt raw format details https://wiki.mobileread.com/wiki/MOBI
* https://github.com/kroo/mobi-python
* KindleNamer
* https://www.mobileread.com/forums/showthread.php?t=197168
* KindleUnpack (MobiUnpack)
* https://www.mobileread.com/forums/showthread.php?t=61986
* https://wiki.mobileread.com/wiki/KindleUnpack
* https://wiki.mobileread.com/wiki/MOBI
* https://github.com/booktype/ebooklib/blob/mobi/ebooklib/mobi.py - not sure this has it but might be worth adding
* https://sourceforge.net/projects/pythonpalmdb/
* See https://stackoverflow.com/questions/9465158/how-to-get-isbn-number-from-mobi-file-with-python
* Last time I checked Calibre it was not usable as a library
* other file formats; epub
"""
import os
import re
import sys
from string import Template
from mobi import Mobi # https://github.com/kroo/mobi-python Python 2.x only :-(
def safe_filename(in_filename):
"""Inspired by KindlerNamer
"""
name = in_filename # keep in_filename around for debugging/compare purposes
# replace characters that are not valid for filenames on different Operating Systems
# TODO \u2013 to '-'?
# TODO \u2014 to '-'?
rename_safe_map = {
'<': u'[',
'>': u']',
#' : ': u' – ',
' : ': u' - ',
#': ': u' – ',
': ': u' - ',
':': u'—',
'/': u'_',
'\\': u'_',
'|': u'_',
'"': u'\'',
'*': u'_',
'?': u'',
}
for unsafe_char in rename_safe_map:
name = name.replace(unsafe_char, rename_safe_map[unsafe_char])
# strip control characters
name = u"".join(char for char in name if ord(char)>=32)
# strip and condence white space, delete leading and trailing while space
name = name.strip()
#name = re.sub(ur"\s", u" ", name) # Py 3.5.3 fails with SyntaxError
name = re.sub(r"\s", u" ", name)
while ' ' in name:
name = name.replace(' ', ' ')
name = name.strip()
# remove leading dots
while name.startswith(u'.'):
name = name[1:]
# remove trailing dots (problem under Microsoft Windows)
while name.endswith(u'.'):
name = name[:-1]
name = name.strip() # second cleanup just in case
return name
def generate_mobi_name(in_filename, template=Template(u'$author - $title.$extn')):
# NOTE assume in_filename is in correct encoding (ideally Unicode string) and will "just work"
book = Mobi(in_filename)
book.parse()
# title is sometimes different compared with "503" entry. E.g. compare 'Broken Homes' and 'Broken Homes (PC Peter Grant Book 4)' for https://www.amazon.com/Broken-Homes-Peter-Grant-Book-ebook/dp/B00DYX9OPC/
author, title = book.author(), book.title() # returns bytes. NOTE not going to use these...
#print(type(author))
#print((author, title))
book_codepage = book.config.get('mobi', {}).get('text Encoding', 1252) # not sure if this is text encoding for content or all meta data (e.g. titles)
#print(book_codepage)
"""
import pprint
pprint.pprint(book.config)
"""
BOOK_CODEPAGE2ENCODING = {
1252 : 'windows-1252',
65001 : 'utf-8',
}
#print(BOOK_CODEPAGE2ENCODING[book_codepage]) # use this with names
#print((author, title))
author = book.config['exth']['records'][100]
title = book.config['exth']['records'].get(503) or book.config['mobi'].get('Full Name') # NOTE if both exist, may want the longest one. So far all books from Amazon I've seen have consistently been 503 (e.g. "SERIES Book X")
author = author.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
title = title.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
extn = os.path.splitext(in_filename)[1]
extn = extn[1:] # removed leading period
#print((author, title, extn))
#print('%s - %s' % (author, title))
new_filename = template.substitute(author=author, title=title, extn=extn) # TODO use a dict?
#new_filename = ' .... ??? <1of 2> "hello"...........' ## DEBUG
new_filename = safe_filename(new_filename)
#print(repr(new_filename))
return new_filename
generate_filename = generate_mobi_name
def main(argv=None):
if argv is None:
argv = sys.argv
in_filename = 'C:\\Users\\clach04\\py\\DeDRM_tools\\DeDRM_Windows_Application\\DeDRM_App\\DeDRM_lib\\lib\\Black Rain_nodrm.azw3'
in_filename = 'C:\\Users\\clach04\\py\\DeDRM_tools\\DeDRM_Windows_Application\\DeDRM_App\\DeDRM_lib\\lib\\kindle_books_fixed\\\\Broken Homes (PC Peter Grant Book 4)_nodrm.azw3'
in_filename = 'C:\\Users\\clach04\\py\\DeDRM_tools\\DeDRM_Windows_Application\\DeDRM_App\\DeDRM_lib\\lib\\kindle_books_fixed\\A Latent Dark_nodrm.mobi'
#in_filename = 'C:\\Users\\clach04\\py\\DeDRM_tools\\DeDRM_Windows_Application\\DeDRM_App\\DeDRM_lib\\lib\\kindle_books_fixed\\Besieged – Stories from The Iron Druid Chronicles_nodrm.azw3'
# dumb argv processing for simplicity - TODO update and use a library
in_filename = argv[1]
print(repr(generate_mobi_name(in_filename)))
print(generate_mobi_name(in_filename))
return 0
if __name__ == "__main__":
sys.exit(main())