def saveLog(): if not os.path.exists(lazylibrarian.CONFIG['LOGDIR']): return 'LOGDIR does not exist' basename = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'lazylibrarian.log') outfile = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'debug') passchars = string.ascii_letters + string.digits + '_/' # _/ used by slack and googlebooks redactlist = ['api -> ', 'apikey -> ', 'pass -> ', 'password -> ', 'token -> ', 'using api [', 'apikey=', 'key=', 'apikey%3D', "apikey': u'", "apikey': ', 'keys ->'"] with open(outfile + '.tmp', 'w') as out: nextfile = True extn = 0 redacts = 0 while nextfile: fname = basename if extn > 0: fname = fname + '.' + str(extn) if not os.path.exists(fname): logger.debug("logfile [%s] does not exist" % fname) nextfile = False else: logger.debug('Processing logfile [%s]' % fname) linecount = 0 for line in reverse_readline(fname): for item in redactlist: startpos = line.find(item) if startpos >= 0: startpos += len(item) endpos = startpos while endpos < len(line) and not line[endpos] in passchars: endpos += 1 while endpos < len(line) and line[endpos] in passchars: endpos += 1 if endpos != startpos: line = line[:startpos] + '<redacted>' + line[endpos:] redacts += 1 out.write("%s\n" % line) if "Debug log ON" in line: logger.debug('Found "Debug log ON" line %s in %s' % (linecount, fname)) nextfile = False break linecount += 1 extn += 1 with open(outfile + '.log', 'w') as logfile: logfile.write(logHeader()) lines = 0 for line in reverse_readline(outfile + '.tmp'): logfile.write("%s\n" % line) lines += 1 os.remove(outfile + '.tmp') logger.debug("Redacted %s passwords/apikeys" % redacts) logger.debug("%s log lines written to %s" % (lines, outfile + '.log')) with zipfile.ZipFile(outfile + '.zip', 'w') as myzip: myzip.write(outfile + '.log', 'debug.log') os.remove(outfile + '.log') return "Debug log saved as %s" % (outfile + '.zip')
def get_book_info(fname): # only handles epub, mobi and opf for now, # for pdf see below res = {} if '.' not in fname: return res words = fname.split('.') extn = words[len(words) - 1] if extn == "mobi": try: book = Mobi(fname) book.parse() except: return res res['creator'] = book.author() res['title'] = book.title() res['language'] = book.language() res['identifier'] = book.isbn() res['type'] = "mobi" return res """ # none of the pdfs in my library had language,isbn # most didn't have author, or had the wrong author # (author set to publisher, or software used) # so probably not much point in looking at pdfs # # if (extn == "pdf"): # pdf = PdfFileReader(open(fname, "rb")) # txt = pdf.getDocumentInfo() # repackage the data here to get components we need # res = {} # for s in ['title','language','creator']: # res[s] = txt[s] # res['identifier'] = txt['isbn'] # res['type'] = "pdf" # return res """ if extn == "epub": # prepare to read from the .epub file zipdata = zipfile.ZipFile(fname) # find the contents metafile txt = zipdata.read('META-INF/container.xml') tree = ElementTree.fromstring(txt) n = 0 cfname = "" if not len(tree): return res while n < len(tree[0]): att = tree[0][n].attrib if 'full-path' in att: cfname = att['full-path'] n = n + 1 # grab the metadata block from the contents metafile txt = zipdata.read(cfname) tree = ElementTree.fromstring(txt) res['type'] = "epub" else: if extn == "opf": txt = open(fname).read() tree = ElementTree.fromstring(txt) res['type'] = "opf" else: return "" # repackage the data if not len(tree): return res n = 0 while n < len(tree[0]): tag = str(tree[0][n].tag).split('}')[1] txt = tree[0][n].text attrib = str(tree[0][n].attrib) isbn = "" if 'title' in tag.lower(): res['title'] = txt elif 'language' in tag.lower(): res['language'] = txt elif 'creator' in tag.lower(): res['creator'] = txt elif 'identifier' in tag.lower() and 'isbn' in attrib.lower(): if formatter.is_valid_isbn(txt): res['identifier'] = txt n = n + 1 return res
def create_cover(issuefile=None, refresh=False): if lazylibrarian.CONFIG[ 'IMP_CONVERT'] == 'None': # special flag to say "no covers required" return if issuefile is None or not os.path.isfile(issuefile): logger.debug('No issuefile %s' % issuefile) return base, extn = os.path.splitext(issuefile) if not extn: logger.debug('Unable to create cover for %s, no extension?' % issuefile) return coverfile = base + '.jpg' if os.path.isfile(coverfile): if refresh: os.remove(coverfile) else: logger.debug('Cover for %s exists' % issuefile) return # quit if cover already exists and we didn't want to refresh logger.debug('Creating cover for %s' % issuefile) data = '' # result from unzip or unrar extn = extn.lower() if extn in ['.cbz', '.epub']: try: data = zipfile.ZipFile(issuefile) except Exception as why: logger.debug("Failed to read zip file %s, %s %s" % (issuefile, type(why).__name__, str(why))) data = '' elif extn in ['.cbr']: try: # unrar will complain if the library isn't installed, needs to be compiled separately # see https://pypi.python.org/pypi/unrar/ for instructions # Download source from http://www.rarlab.com/rar_add.htm # note we need LIBRARY SOURCE not a binary package # make lib; sudo make install-lib; sudo ldconfig # lib.unrar should then be able to find libunrar.so from lib.unrar import rarfile data = rarfile.RarFile(issuefile) except Exception as why: logger.debug("Failed to read rar file %s, %s %s" % (issuefile, type(why).__name__, str(why))) data = '' if data: img = '' try: for member in data.namelist(): memlow = member.lower() if '-00.' in memlow or '000.' in memlow or 'cover.' in memlow: if memlow.endswith('.jpg') or memlow.endswith('.jpeg'): img = data.read(member) break if img: with open(coverfile, 'wb') as f: f.write(img) return else: logger.debug("Failed to find image in %s" % issuefile) except Exception as why: logger.debug("Failed to extract image from %s, %s %s" % (issuefile, type(why).__name__, str(why))) elif extn == '.pdf': generator = "" if len(lazylibrarian.CONFIG['IMP_CONVERT'] ): # allow external convert to override libraries generator = "external program: %s" % lazylibrarian.CONFIG[ 'IMP_CONVERT'] if "gsconvert.py" in lazylibrarian.CONFIG['IMP_CONVERT']: msg = "Use of gsconvert.py is deprecated, equivalent functionality is now built in. " msg += "Support for gsconvert.py may be removed in a future release. See wiki for details." logger.warn(msg) converter = lazylibrarian.CONFIG['IMP_CONVERT'] postfix = '' # if not os.path.isfile(converter): # full path given, or just program_name? # converter = os.path.join(os.getcwd(), lazylibrarian.CONFIG['IMP_CONVERT']) if 'convert' in converter and 'gs' not in converter: # tell imagemagick to only convert first page postfix = '[0]' try: params = [ converter, '%s%s' % (issuefile, postfix), '%s' % coverfile ] res = subprocess.check_output(params, stderr=subprocess.STDOUT) if res: logger.debug('%s reports: %s' % (lazylibrarian.CONFIG['IMP_CONVERT'], res)) except Exception as e: # logger.debug(params) logger.debug('External "convert" failed %s %s' % (type(e).__name__, str(e))) elif platform.system() == "Windows": GS = os.path.join(os.getcwd(), "gswin64c.exe") generator = "local gswin64c" if not os.path.isfile(GS): GS = os.path.join(os.getcwd(), "gswin32c.exe") generator = "local gswin32c" if not os.path.isfile(GS): params = ["where", "gswin64c"] try: GS = subprocess.check_output( params, stderr=subprocess.STDOUT).strip() generator = "gswin64c" except Exception as e: logger.debug("where gswin64c failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): params = ["where", "gswin32c"] try: GS = subprocess.check_output( params, stderr=subprocess.STDOUT).strip() generator = "gswin32c" except Exception as e: logger.debug("where gswin32c failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): logger.debug("No gswin found") generator = "(no windows ghostscript found)" else: # noinspection PyBroadException try: params = [GS, "--version"] res = subprocess.check_output(params, stderr=subprocess.STDOUT) logger.debug("Found %s [%s] version %s" % (generator, GS, res)) generator = "%s version %s" % (generator, res) issuefile = issuefile.split('[')[0] params = [ GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER", "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox", "-sOutputFile=%s" % coverfile, issuefile ] res = subprocess.check_output(params, stderr=subprocess.STDOUT) if not os.path.isfile(coverfile): logger.debug("Failed to create jpg: %s" % res) except Exception: # as why: logger.debug("Failed to create jpg for %s" % issuefile) logger.debug('Exception in gswin create_cover: %s' % traceback.format_exc()) else: # not windows try: # noinspection PyUnresolvedReferences from wand.image import Image interface = "wand" except ImportError: try: # No PythonMagick in python3 # noinspection PyUnresolvedReferences import PythonMagick interface = "pythonmagick" except ImportError: interface = "" try: if interface == 'wand': generator = "wand interface" with Image(filename=issuefile + '[0]') as img: img.save(filename=coverfile) elif interface == 'pythonmagick': generator = "pythonmagick interface" img = PythonMagick.Image() # PythonMagick requires filenames to be str, not unicode if type(issuefile) is unicode: issuefile = unaccented_str(issuefile) if type(coverfile) is unicode: coverfile = unaccented_str(coverfile) img.read(issuefile + '[0]') img.write(coverfile) else: GS = os.path.join(os.getcwd(), "gs") generator = "local gs" if not os.path.isfile(GS): GS = "" params = ["which", "gs"] try: GS = subprocess.check_output( params, stderr=subprocess.STDOUT).strip() generator = GS except Exception as e: logger.debug("which gs failed: %s %s" % (type(e).__name__, str(e))) if not os.path.isfile(GS): logger.debug("Cannot find gs") generator = "(no gs found)" else: params = [GS, "--version"] res = subprocess.check_output( params, stderr=subprocess.STDOUT) logger.debug("Found gs [%s] version %s" % (GS, res)) generator = "%s version %s" % (generator, res) issuefile = issuefile.split('[')[0] params = [ GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER", "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox", "-sOutputFile=%s" % coverfile, issuefile ] res = subprocess.check_output( params, stderr=subprocess.STDOUT) if not os.path.isfile(coverfile): logger.debug("Failed to create jpg: %s" % res) except Exception as e: logger.debug("Unable to create cover for %s using %s %s" % (issuefile, type(e).__name__, generator)) logger.debug('Exception in create_cover: %s' % traceback.format_exc()) if os.path.isfile(coverfile): setperm(coverfile) logger.debug("Created cover for %s using %s" % (issuefile, generator)) return # if not recognised extension or cover creation failed try: shutil.copyfile( os.path.join(lazylibrarian.PROG_DIR, 'data/images/nocover.jpg'), coverfile) setperm(coverfile) except Exception as why: logger.debug("Failed to copy nocover file, %s %s" % (type(why).__name__, str(why))) return
def saveLog(): if not os.path.exists(lazylibrarian.CONFIG['LOGDIR']): return 'LOGDIR does not exist' popen_list = [sys.executable, lazylibrarian.FULL_PATH] popen_list += lazylibrarian.ARGS header = "Startup cmd: %s\n" % str(popen_list) header += 'Interface: %s\n' % lazylibrarian.CONFIG['HTTP_LOOK'] header += 'Loglevel: %s\n' % lazylibrarian.LOGLEVEL for item in lazylibrarian.CONFIG_GIT: header += '%s: %s\n' % (item.lower(), lazylibrarian.CONFIG[item]) header += "Python version: %s\n" % sys.version.split('\n') header += "Distribution: %s\n" % str(platform.dist()) header += "System: %s\n" % str(platform.system()) header += "Machine: %s\n" % str(platform.machine()) header += "Platform: %s\n" % str(platform.platform()) header += "uname: %s\n" % str(platform.uname()) header += "version: %s\n" % str(platform.version()) header += "mac_ver: %s\n" % str(platform.mac_ver()) basename = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'lazylibrarian.log') outfile = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'debug') passchars = string.ascii_letters + string.digits + '_/' # _/ used by slack and googlebooks redactlist = ['api -> ', 'apikey -> ', 'pass -> ', 'password -> ', 'token -> ', 'using api [', 'apikey=', 'key=', 'apikey%3D', "apikey': u'", "apikey': '"] with open(outfile + '.tmp', 'w') as out: nextfile = True extn = 0 redacts = 0 while nextfile: fname = basename if extn > 0: fname = fname + '.' + str(extn) if not os.path.exists(fname): logger.debug("logfile [%s] does not exist" % fname) nextfile = False else: logger.debug('Processing logfile [%s]' % fname) linecount = 0 for line in reverse_readline(fname): for item in redactlist: startpos = line.find(item) if startpos >= 0: startpos += len(item) endpos = startpos while endpos < len(line) and not line[endpos] in passchars: endpos += 1 while endpos < len(line) and line[endpos] in passchars: endpos += 1 if endpos != startpos: line = line[:startpos] + '<redacted>' + line[endpos:] redacts += 1 out.write("%s\n" % line) if "Debug log ON" in line: logger.debug('Found "Debug log ON" line %s in %s' % (linecount, fname)) nextfile = False break linecount += 1 extn += 1 with open(outfile + '.log', 'w') as logfile: logfile.write(header) lines = 0 # len(header.split('\n')) for line in reverse_readline(outfile + '.tmp'): logfile.write("%s\n" % line) lines += 1 os.remove(outfile + '.tmp') logger.debug("Redacted %s passwords/apikeys" % redacts) logger.debug("%s log lines written to %s" % (lines, outfile + '.log')) with zipfile.ZipFile(outfile + '.zip', 'w') as myzip: myzip.write(outfile + '.log', 'debug.log') os.remove(outfile + '.log') return "Debug log saved as %s" % (outfile + '.zip')
def get_book_info(fname): # only handles epub, mobi, azw3 and opf for now, # for pdf see notes below res = {} extn = os.path.splitext(fname)[1] if not extn: return res if extn == ".mobi" or extn == ".azw3": res['type'] = extn[1:] try: book = Mobi(fname) book.parse() except Exception as e: logger.debug('Unable to parse mobi in %s, %s' % (fname, str(e))) return res res['creator'] = book.author() res['title'] = book.title() res['language'] = book.language() res['identifier'] = book.isbn() return res """ # none of the pdfs in my library had language,isbn # most didn't have author, or had the wrong author # (author set to publisher, or software used) # so probably not much point in looking at pdfs # if (extn == ".pdf"): pdf = PdfFileReader(open(fname, "rb")) txt = pdf.getDocumentInfo() # repackage the data here to get components we need res = {} for s in ['title','language','creator']: res[s] = txt[s] res['identifier'] = txt['isbn'] res['type'] = "pdf" return res """ elif extn == ".epub": res['type'] = "epub" # prepare to read from the .epub file try: zipdata = zipfile.ZipFile(fname) except Exception as e: logger.debug('Unable to parse zipfile %s, %s' % (fname, str(e))) return res # find the contents metafile txt = zipdata.read('META-INF/container.xml') tree = ElementTree.fromstring(txt) n = 0 cfname = "" if not len(tree): return res while n < len(tree[0]): att = tree[0][n].attrib if 'full-path' in att: cfname = att['full-path'] break n = n + 1 # grab the metadata block from the contents metafile txt = zipdata.read(cfname) elif extn == ".opf": res['type'] = "opf" txt = open(fname).read() # sanitize any unmatched html tags or ElementTree won't parse dic = {'<br>': '', '</br>': ''} txt = replace_all(txt, dic) # repackage epub or opf metadata try: tree = ElementTree.fromstring(txt) except Exception as e: logger.error("Error parsing metadata from %s, %s" % (fname, str(e))) return res if not len(tree): return res n = 0 while n < len(tree[0]): tag = str(tree[0][n].tag).lower() if '}' in tag: tag = tag.split('}')[1] txt = tree[0][n].text attrib = str(tree[0][n].attrib).lower() if 'title' in tag: res['title'] = txt elif 'language' in tag: res['language'] = txt elif 'creator' in tag: res['creator'] = txt elif 'identifier' in tag and 'isbn' in attrib: if is_valid_isbn(txt): res['identifier'] = txt n = n + 1 return res