Esempio n. 1
0
def saveLog():
    if not os.path.exists(lazylibrarian.CONFIG['LOGDIR']):
        return 'LOGDIR does not exist'

    basename = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'lazylibrarian.log')
    outfile = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'debug')
    passchars = string.ascii_letters + string.digits + '_/'  # _/ used by slack and googlebooks
    redactlist = ['api -> ', 'apikey -> ', 'pass -> ', 'password -> ', 'token -> ', 'using api [',
                  'apikey=', 'key=', 'apikey%3D', "apikey': u'", "apikey': ', 'keys ->'"]
    with open(outfile + '.tmp', 'w') as out:
        nextfile = True
        extn = 0
        redacts = 0
        while nextfile:
            fname = basename
            if extn > 0:
                fname = fname + '.' + str(extn)
            if not os.path.exists(fname):
                logger.debug("logfile [%s] does not exist" % fname)
                nextfile = False
            else:
                logger.debug('Processing logfile [%s]' % fname)
                linecount = 0
                for line in reverse_readline(fname):
                    for item in redactlist:
                        startpos = line.find(item)
                        if startpos >= 0:
                            startpos += len(item)
                            endpos = startpos
                            while endpos < len(line) and not line[endpos] in passchars:
                                endpos += 1
                            while endpos < len(line) and line[endpos] in passchars:
                                endpos += 1
                            if endpos != startpos:
                                line = line[:startpos] + '<redacted>' + line[endpos:]
                                redacts += 1

                    out.write("%s\n" % line)
                    if "Debug log ON" in line:
                        logger.debug('Found "Debug log ON" line %s in %s' % (linecount, fname))
                        nextfile = False
                        break
                    linecount += 1
                extn += 1

    with open(outfile + '.log', 'w') as logfile:
        logfile.write(logHeader())
        lines = 0
        for line in reverse_readline(outfile + '.tmp'):
            logfile.write("%s\n" % line)
            lines += 1
    os.remove(outfile + '.tmp')
    logger.debug("Redacted %s passwords/apikeys" % redacts)
    logger.debug("%s log lines written to %s" % (lines, outfile + '.log'))
    with zipfile.ZipFile(outfile + '.zip', 'w') as myzip:
        myzip.write(outfile + '.log', 'debug.log')
    os.remove(outfile + '.log')
    return "Debug log saved as %s" % (outfile + '.zip')
Esempio n. 2
0
def get_book_info(fname):
    # only handles epub, mobi and opf for now,
    # for pdf see below
    res = {}
    if '.' not in fname:
        return res
    words = fname.split('.')
    extn = words[len(words) - 1]

    if extn == "mobi":
        try:
            book = Mobi(fname)
            book.parse()
        except:
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        res['type'] = "mobi"
        return res

    """
    # none of the pdfs in my library had language,isbn
    # most didn't have author, or had the wrong author
    # (author set to publisher, or software used)
    # so probably not much point in looking at pdfs
    #
    # if (extn == "pdf"):
    #	  pdf = PdfFileReader(open(fname, "rb"))
    #	  txt = pdf.getDocumentInfo()
          # repackage the data here to get components we need
    #     res = {}
    #     for s in ['title','language','creator']:
    #         res[s] = txt[s]
    #	  res['identifier'] = txt['isbn']
    #     res['type'] = "pdf"
    #	  return res
    """

    if extn == "epub":
        # prepare to read from the .epub file
        zipdata = zipfile.ZipFile(fname)
        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res

        while n < len(tree[0]):
            att = tree[0][n].attrib
            if 'full-path' in att:
                cfname = att['full-path']
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)
        tree = ElementTree.fromstring(txt)
        res['type'] = "epub"
    else:
        if extn == "opf":
            txt = open(fname).read()
            tree = ElementTree.fromstring(txt)
            res['type'] = "opf"
        else:
            return ""

    # repackage the data
    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).split('}')[1]
        txt = tree[0][n].text
        attrib = str(tree[0][n].attrib)
        isbn = ""
        if 'title' in tag.lower():
            res['title'] = txt
        elif 'language' in tag.lower():
            res['language'] = txt
        elif 'creator' in tag.lower():
            res['creator'] = txt
        elif 'identifier' in tag.lower() and 'isbn' in attrib.lower():
            if formatter.is_valid_isbn(txt):
                res['identifier'] = txt
        n = n + 1
    return res
Esempio n. 3
0
def create_cover(issuefile=None, refresh=False):
    if lazylibrarian.CONFIG[
            'IMP_CONVERT'] == 'None':  # special flag to say "no covers required"
        return
    if issuefile is None or not os.path.isfile(issuefile):
        logger.debug('No issuefile %s' % issuefile)
        return

    base, extn = os.path.splitext(issuefile)
    if not extn:
        logger.debug('Unable to create cover for %s, no extension?' %
                     issuefile)
        return

    coverfile = base + '.jpg'

    if os.path.isfile(coverfile):
        if refresh:
            os.remove(coverfile)
        else:
            logger.debug('Cover for %s exists' % issuefile)
            return  # quit if cover already exists and we didn't want to refresh

    logger.debug('Creating cover for %s' % issuefile)
    data = ''  # result from unzip or unrar
    extn = extn.lower()
    if extn in ['.cbz', '.epub']:
        try:
            data = zipfile.ZipFile(issuefile)
        except Exception as why:
            logger.debug("Failed to read zip file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    elif extn in ['.cbr']:
        try:
            # unrar will complain if the library isn't installed, needs to be compiled separately
            # see https://pypi.python.org/pypi/unrar/ for instructions
            # Download source from http://www.rarlab.com/rar_add.htm
            # note we need LIBRARY SOURCE not a binary package
            # make lib; sudo make install-lib; sudo ldconfig
            # lib.unrar should then be able to find libunrar.so
            from lib.unrar import rarfile
            data = rarfile.RarFile(issuefile)
        except Exception as why:
            logger.debug("Failed to read rar file %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))
            data = ''
    if data:
        img = ''
        try:
            for member in data.namelist():
                memlow = member.lower()
                if '-00.' in memlow or '000.' in memlow or 'cover.' in memlow:
                    if memlow.endswith('.jpg') or memlow.endswith('.jpeg'):
                        img = data.read(member)
                        break
            if img:
                with open(coverfile, 'wb') as f:
                    f.write(img)
                return
            else:
                logger.debug("Failed to find image in %s" % issuefile)
        except Exception as why:
            logger.debug("Failed to extract image from %s, %s %s" %
                         (issuefile, type(why).__name__, str(why)))

    elif extn == '.pdf':
        generator = ""
        if len(lazylibrarian.CONFIG['IMP_CONVERT']
               ):  # allow external convert to override libraries
            generator = "external program: %s" % lazylibrarian.CONFIG[
                'IMP_CONVERT']
            if "gsconvert.py" in lazylibrarian.CONFIG['IMP_CONVERT']:
                msg = "Use of gsconvert.py is deprecated, equivalent functionality is now built in. "
                msg += "Support for gsconvert.py may be removed in a future release. See wiki for details."
                logger.warn(msg)
            converter = lazylibrarian.CONFIG['IMP_CONVERT']
            postfix = ''
            # if not os.path.isfile(converter):  # full path given, or just program_name?
            #     converter = os.path.join(os.getcwd(), lazylibrarian.CONFIG['IMP_CONVERT'])
            if 'convert' in converter and 'gs' not in converter:
                # tell imagemagick to only convert first page
                postfix = '[0]'
            try:
                params = [
                    converter,
                    '%s%s' % (issuefile, postfix),
                    '%s' % coverfile
                ]
                res = subprocess.check_output(params, stderr=subprocess.STDOUT)
                if res:
                    logger.debug('%s reports: %s' %
                                 (lazylibrarian.CONFIG['IMP_CONVERT'], res))
            except Exception as e:
                # logger.debug(params)
                logger.debug('External "convert" failed %s %s' %
                             (type(e).__name__, str(e)))

        elif platform.system() == "Windows":
            GS = os.path.join(os.getcwd(), "gswin64c.exe")
            generator = "local gswin64c"
            if not os.path.isfile(GS):
                GS = os.path.join(os.getcwd(), "gswin32c.exe")
                generator = "local gswin32c"
            if not os.path.isfile(GS):
                params = ["where", "gswin64c"]
                try:
                    GS = subprocess.check_output(
                        params, stderr=subprocess.STDOUT).strip()
                    generator = "gswin64c"
                except Exception as e:
                    logger.debug("where gswin64c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                params = ["where", "gswin32c"]
                try:
                    GS = subprocess.check_output(
                        params, stderr=subprocess.STDOUT).strip()
                    generator = "gswin32c"
                except Exception as e:
                    logger.debug("where gswin32c failed: %s %s" %
                                 (type(e).__name__, str(e)))
            if not os.path.isfile(GS):
                logger.debug("No gswin found")
                generator = "(no windows ghostscript found)"
            else:
                # noinspection PyBroadException
                try:
                    params = [GS, "--version"]
                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    logger.debug("Found %s [%s] version %s" %
                                 (generator, GS, res))
                    generator = "%s version %s" % (generator, res)
                    issuefile = issuefile.split('[')[0]
                    params = [
                        GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH", "-dSAFER",
                        "-dFirstPage=1", "-dLastPage=1", "-dUseCropBox",
                        "-sOutputFile=%s" % coverfile, issuefile
                    ]
                    res = subprocess.check_output(params,
                                                  stderr=subprocess.STDOUT)
                    if not os.path.isfile(coverfile):
                        logger.debug("Failed to create jpg: %s" % res)
                except Exception:  # as why:
                    logger.debug("Failed to create jpg for %s" % issuefile)
                    logger.debug('Exception in gswin create_cover: %s' %
                                 traceback.format_exc())
        else:  # not windows
            try:
                # noinspection PyUnresolvedReferences
                from wand.image import Image
                interface = "wand"
            except ImportError:
                try:
                    # No PythonMagick in python3
                    # noinspection PyUnresolvedReferences
                    import PythonMagick
                    interface = "pythonmagick"
                except ImportError:
                    interface = ""
            try:
                if interface == 'wand':
                    generator = "wand interface"
                    with Image(filename=issuefile + '[0]') as img:
                        img.save(filename=coverfile)

                elif interface == 'pythonmagick':
                    generator = "pythonmagick interface"
                    img = PythonMagick.Image()
                    # PythonMagick requires filenames to be str, not unicode
                    if type(issuefile) is unicode:
                        issuefile = unaccented_str(issuefile)
                    if type(coverfile) is unicode:
                        coverfile = unaccented_str(coverfile)
                    img.read(issuefile + '[0]')
                    img.write(coverfile)

                else:
                    GS = os.path.join(os.getcwd(), "gs")
                    generator = "local gs"
                    if not os.path.isfile(GS):
                        GS = ""
                        params = ["which", "gs"]
                        try:
                            GS = subprocess.check_output(
                                params, stderr=subprocess.STDOUT).strip()
                            generator = GS
                        except Exception as e:
                            logger.debug("which gs failed: %s %s" %
                                         (type(e).__name__, str(e)))
                        if not os.path.isfile(GS):
                            logger.debug("Cannot find gs")
                            generator = "(no gs found)"
                        else:
                            params = [GS, "--version"]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            logger.debug("Found gs [%s] version %s" %
                                         (GS, res))
                            generator = "%s version %s" % (generator, res)
                            issuefile = issuefile.split('[')[0]
                            params = [
                                GS, "-sDEVICE=jpeg", "-dNOPAUSE", "-dBATCH",
                                "-dSAFER", "-dFirstPage=1", "-dLastPage=1",
                                "-dUseCropBox",
                                "-sOutputFile=%s" % coverfile, issuefile
                            ]
                            res = subprocess.check_output(
                                params, stderr=subprocess.STDOUT)
                            if not os.path.isfile(coverfile):
                                logger.debug("Failed to create jpg: %s" % res)
            except Exception as e:
                logger.debug("Unable to create cover for %s using %s %s" %
                             (issuefile, type(e).__name__, generator))
                logger.debug('Exception in create_cover: %s' %
                             traceback.format_exc())

        if os.path.isfile(coverfile):
            setperm(coverfile)
            logger.debug("Created cover for %s using %s" %
                         (issuefile, generator))
            return

    # if not recognised extension or cover creation failed
    try:
        shutil.copyfile(
            os.path.join(lazylibrarian.PROG_DIR, 'data/images/nocover.jpg'),
            coverfile)
        setperm(coverfile)
    except Exception as why:
        logger.debug("Failed to copy nocover file, %s %s" %
                     (type(why).__name__, str(why)))
    return
Esempio n. 4
0
def saveLog():
    if not os.path.exists(lazylibrarian.CONFIG['LOGDIR']):
        return 'LOGDIR does not exist'

    popen_list = [sys.executable, lazylibrarian.FULL_PATH]
    popen_list += lazylibrarian.ARGS
    header = "Startup cmd: %s\n" % str(popen_list)
    header += 'Interface: %s\n' % lazylibrarian.CONFIG['HTTP_LOOK']
    header += 'Loglevel: %s\n' % lazylibrarian.LOGLEVEL
    for item in lazylibrarian.CONFIG_GIT:
        header += '%s: %s\n' % (item.lower(), lazylibrarian.CONFIG[item])
    header += "Python version: %s\n" % sys.version.split('\n')
    header += "Distribution: %s\n" % str(platform.dist())
    header += "System: %s\n" % str(platform.system())
    header += "Machine: %s\n" % str(platform.machine())
    header += "Platform: %s\n" % str(platform.platform())
    header += "uname: %s\n" % str(platform.uname())
    header += "version: %s\n" % str(platform.version())
    header += "mac_ver: %s\n" % str(platform.mac_ver())

    basename = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'lazylibrarian.log')
    outfile = os.path.join(lazylibrarian.CONFIG['LOGDIR'], 'debug')
    passchars = string.ascii_letters + string.digits + '_/'  # _/ used by slack and googlebooks
    redactlist = ['api -> ', 'apikey -> ', 'pass -> ', 'password -> ', 'token -> ', 'using api [',
                    'apikey=', 'key=', 'apikey%3D', "apikey': u'", "apikey': '"]
    with open(outfile + '.tmp', 'w') as out:
        nextfile = True
        extn = 0
        redacts = 0
        while nextfile:
            fname = basename
            if extn > 0:
                fname = fname + '.' + str(extn)
            if not os.path.exists(fname):
                logger.debug("logfile [%s] does not exist" % fname)
                nextfile = False
            else:
                logger.debug('Processing logfile [%s]' % fname)
                linecount = 0
                for line in reverse_readline(fname):
                    for item in redactlist:
                        startpos = line.find(item)
                        if startpos >= 0:
                            startpos += len(item)
                            endpos = startpos
                            while endpos < len(line) and not line[endpos] in passchars:
                                endpos += 1
                            while endpos < len(line) and line[endpos] in passchars:
                                endpos += 1
                            if endpos != startpos:
                                line = line[:startpos] + '<redacted>' + line[endpos:]
                                redacts += 1

                    out.write("%s\n" % line)
                    if "Debug log ON" in line:
                        logger.debug('Found "Debug log ON" line %s in %s' % (linecount, fname))
                        nextfile = False
                        break
                    linecount += 1
                extn += 1

    with open(outfile + '.log', 'w') as logfile:
        logfile.write(header)
        lines = 0  # len(header.split('\n'))
        for line in reverse_readline(outfile + '.tmp'):
            logfile.write("%s\n" % line)
            lines += 1
    os.remove(outfile + '.tmp')
    logger.debug("Redacted %s passwords/apikeys" % redacts)
    logger.debug("%s log lines written to %s" % (lines, outfile + '.log'))
    with zipfile.ZipFile(outfile + '.zip', 'w') as myzip:
        myzip.write(outfile + '.log', 'debug.log')
    os.remove(outfile + '.log')
    return "Debug log saved as %s" % (outfile + '.zip')
Esempio n. 5
0
def get_book_info(fname):
    # only handles epub, mobi, azw3 and opf for now,
    # for pdf see notes below
    res = {}
    extn = os.path.splitext(fname)[1]
    if not extn:
        return res
    if extn == ".mobi" or extn == ".azw3":
        res['type'] = extn[1:]
        try:
            book = Mobi(fname)
            book.parse()
        except Exception as e:
            logger.debug('Unable to parse mobi in %s, %s' % (fname, str(e)))
            return res
        res['creator'] = book.author()
        res['title'] = book.title()
        res['language'] = book.language()
        res['identifier'] = book.isbn()
        return res
        """
        # none of the pdfs in my library had language,isbn
        # most didn't have author, or had the wrong author
        # (author set to publisher, or software used)
        # so probably not much point in looking at pdfs
        #
        if (extn == ".pdf"):
            pdf = PdfFileReader(open(fname, "rb"))
            txt = pdf.getDocumentInfo()
            # repackage the data here to get components we need
            res = {}
            for s in ['title','language','creator']:
                res[s] = txt[s]
            res['identifier'] = txt['isbn']
            res['type'] = "pdf"
            return res
        """
    elif extn == ".epub":
        res['type'] = "epub"

        # prepare to read from the .epub file
        try:
            zipdata = zipfile.ZipFile(fname)
        except Exception as e:
            logger.debug('Unable to parse zipfile %s, %s' % (fname, str(e)))
            return res

        # find the contents metafile
        txt = zipdata.read('META-INF/container.xml')
        tree = ElementTree.fromstring(txt)
        n = 0
        cfname = ""
        if not len(tree):
            return res

        while n < len(tree[0]):
            att = tree[0][n].attrib
            if 'full-path' in att:
                cfname = att['full-path']
                break
            n = n + 1

        # grab the metadata block from the contents metafile
        txt = zipdata.read(cfname)

    elif extn == ".opf":
        res['type'] = "opf"
        txt = open(fname).read()
        # sanitize any unmatched html tags or ElementTree won't parse
        dic = {'<br>': '', '</br>': ''}
        txt = replace_all(txt, dic)

    # repackage epub or opf metadata
    try:
        tree = ElementTree.fromstring(txt)
    except Exception as e:
        logger.error("Error parsing metadata from %s, %s" % (fname, str(e)))
        return res

    if not len(tree):
        return res
    n = 0
    while n < len(tree[0]):
        tag = str(tree[0][n].tag).lower()
        if '}' in tag:
            tag = tag.split('}')[1]
            txt = tree[0][n].text
            attrib = str(tree[0][n].attrib).lower()
            if 'title' in tag:
                res['title'] = txt
            elif 'language' in tag:
                res['language'] = txt
            elif 'creator' in tag:
                res['creator'] = txt
            elif 'identifier' in tag and 'isbn' in attrib:
                if is_valid_isbn(txt):
                    res['identifier'] = txt
        n = n + 1
    return res