コード例 #1
0
ファイル: logger.py プロジェクト: AndyHazz/LazyLibrarian
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Ensure messages are utf-8 as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message).encode('utf-8')

        if level != 'DEBUG' or lazylibrarian.LOGFULL is True:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT:
                del lazylibrarian.LOGLIST[-1]

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
コード例 #2
0
ファイル: logger.py プロジェクト: forge33/LazyLibrarian
    def log(self, message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message).encode(
            lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGFULL is True:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT:
                del lazylibrarian.LOGLIST[-1]

        message = threadname + ' : ' + message

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warn(message)
        else:
            logger.error(message)
コード例 #3
0
ファイル: images.py プロジェクト: kuuratsanik/LazyLibrarian-1
def getAuthorImage(authorid=None):
    # tbm=isch      search images
    # tbs=ift:jpg  jpeg file type
    if not authorid:
        logger.error("getAuthorImage: No authorid")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, "author", authorid + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug("getAuthorImage: Returning Cached response for %s" %
                     coverfile)
        coverlink = 'cache/author/' + authorid + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    myDB = database.DBConnection()
    author = myDB.match('select AuthorName from authors where AuthorID=?',
                        (authorid, ))
    if author:
        authorname = safe_unicode(author['AuthorName'])
        if PY2:
            authorname = authorname.encode(lazylibrarian.SYS_ENCODING)
        safeparams = quote_plus("author %s" % authorname)
        URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author'
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split(
                    'src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink, success, was_in_cache = cache_img(
                    "author", authorid, img)
                if success:
                    if was_in_cache:
                        logger.debug("Returning cached google image for %s" %
                                     authorname)
                    else:
                        logger.debug("Cached google image for %s" % authorname)
                    return coverlink
                else:
                    logger.debug("Error getting google image %s, [%s]" %
                                 (img, coverlink))
            else:
                logger.debug("No image found in google page for %s" %
                             authorname)
        else:
            logger.debug("Error getting google page for %s, [%s]" %
                         (safeparams, result))
    else:
        logger.debug("No author found for %s" % authorid)
    return None
コード例 #4
0
def getAuthorImage(authorid=None):
    # tbm=isch      search images
    # tbs=ift:jpg  jpeg file type
    if not authorid:
        logger.error("getAuthorImage: No authorid")
        return None

    cachedir = os.path.join(str(lazylibrarian.PROG_DIR),
                            'data' + os.sep + 'images' + os.sep + 'cache')
    coverfile = os.path.join(cachedir, authorid + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug(u"getAuthorImage: Returning Cached response for %s" %
                     coverfile)
        coverlink = 'images/cache/' + authorid + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    myDB = database.DBConnection()
    authors = myDB.select(
        'select AuthorName from authors where AuthorID = "%s"' % authorid)
    if authors:
        authorname = safe_unicode(authors[0][0]).encode(
            lazylibrarian.SYS_ENCODING)
        safeparams = urllib.quote_plus("%s" % authorname)
        URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg&as_q=" + safeparams
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split(
                    'src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink = cache_cover(authorid, img)
                if coverlink is not None:
                    logger.debug("Cached google image for %s" % authorname)
                    return coverlink
                else:
                    logger.debug("Error getting google image %s, [%s]" %
                                 (img, result))
            else:
                logger.debug("No image found in google page for %s" %
                             authorname)
        else:
            logger.debug("Error getting google page for %s, [%s]" %
                         (safeparams, result))
    else:
        logger.debug("No author found for %s" % authorid)
    return None
コード例 #5
0
ファイル: images.py プロジェクト: knobunc/LazyLibrarian
def getAuthorImage(authorid=None):
    # tbm=isch      search images
    # tbs=ift:jpg  jpeg file type
    if not authorid:
        logger.error("getAuthorImage: No authorid")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, "author", authorid + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile)
        coverlink = 'cache/author/' + authorid + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    myDB = database.DBConnection()
    author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid,))
    if author:
        authorname = safe_unicode(author['AuthorName'])
        if PY2:
            authorname = authorname.encode(lazylibrarian.SYS_ENCODING)
        safeparams = quote_plus("author %s" % authorname)
        URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author'
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink, success, was_in_cache = cache_img("author", authorid, img)
                if success:
                    if was_in_cache:
                        logger.debug("Returning cached google image for %s" % authorname)
                    else:
                        logger.debug("Cached google image for %s" % authorname)
                    return coverlink
                else:
                    logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
            else:
                logger.debug("No image found in google page for %s" % authorname)
        else:
            logger.debug("Error getting google page for %s, [%s]" % (safeparams, result))
    else:
        logger.debug("No author found for %s" % authorid)
    return None
コード例 #6
0
ファイル: bookwork.py プロジェクト: Noppadet/LazyLibrarian
def getWorkSeries(bookID=None):
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return None, None
    work = getBookWork(bookID)
    if work:
        try:
            series = work.split('<a href="/series/')[1].split('">')[1].split('</a>')[0]
        except IndexError:
            return None, None
        series = formatter.safe_unicode(series).encode('utf-8')
        if series and '(' in series:
            seriesnum = series.split('(')[1].split(')')[0]
            series = series.split(' (')[0]
        else:
            seriesnum = None
        return series, seriesnum
    return None, None
コード例 #7
0
ファイル: logger.py プロジェクト: kuuratsanik/LazyLibrarian-1
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        if 'windows' in platform.system().lower(
        ):  # windows cp1252 can't handle some accents
            message = formatter.unaccented(message)
        elif PY2:
            message = formatter.safe_unicode(message)
            message = message.encode(lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0,
                                         (formatter.now(), level, threadname,
                                          program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > formatter.check_int(
                    lazylibrarian.CONFIG['LOGLIMIT'], 500):
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno,
                                          message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
コード例 #8
0
ファイル: bookwork.py プロジェクト: NVRemoteDev/LazyLibrarian
def getWorkSeries(bookID=None):
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return None, None
    work = getBookWork(bookID)
    if work:
        try:
            series = work.split('<a href="/series/')[1].split('">')[1].split(
                '</a>')[0]
        except IndexError:
            return None, None
        series = formatter.safe_unicode(series).encode('utf-8')
        if series and '(' in series:
            seriesnum = series.split('(')[1].split(')')[0]
            series = series.split(' (')[0]
        else:
            seriesnum = None
        return series, seriesnum
    return None, None
コード例 #9
0
def getWorkSeries(bookID=None):
    """ Return the series name and number in series for the given bookid
        Returns None if no series or series number """
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return None, None
    work = getBookWork(bookID, "Series")
    if work:
        try:
            series = work.split('<a href="/series/')[1].split('">')[1].split('</a>')[0]
        except IndexError:
            return None, None
        series = safe_unicode(series).encode(lazylibrarian.SYS_ENCODING)
        if series and '(' in series:
            seriesnum = series.split('(')[1].split(')')[0]
            series = series.split(' (')[0]
        else:
            seriesnum = None
        return series, seriesnum
    return None, None
コード例 #10
0
def getWorkSeries(bookID=None):
    """ Return the series name and number in series for the given bookid
        Returns None if no series or series number """
    if not bookID:
        logger.error("getWorkSeries - No bookID")
        return None, None
    work = getBookWork(bookID, "Series")
    if work:
        try:
            series = work.split('<a href="/series/')[1].split('">')[1].split(
                '</a>')[0]
        except IndexError:
            return None, None
        series = safe_unicode(series).encode(lazylibrarian.SYS_ENCODING)
        if series and '(' in series:
            seriesnum = series.split('(')[1].split(')')[0]
            series = series.split(' (')[0]
        else:
            seriesnum = None
        return series, seriesnum
    return None, None
コード例 #11
0
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them
        message = formatter.safe_unicode(message)
        message = message.encode(lazylibrarian.SYS_ENCODING)
        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0,
                                         (formatter.now(), level, threadname,
                                          program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > lazylibrarian.CONFIG['LOGLIMIT']:
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno,
                                          message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
コード例 #12
0
ファイル: logger.py プロジェクト: DobyTang/LazyLibrarian
    def log(message, level):

        logger = logging.getLogger('lazylibrarian')

        threadname = threading.currentThread().getName()

        # Get the frame data of the method that made the original logger call
        if len(inspect.stack()) > 2:
            frame = inspect.getframeinfo(inspect.stack()[2][0])
            program = os.path.basename(frame.filename)
            method = frame.function
            lineno = frame.lineno
        else:
            program = ""
            method = ""
            lineno = ""

        if 'windows' in platform.system().lower():  # windows cp1252 can't handle some accents
            message = formatter.unaccented(message)
        elif PY2:
            message = formatter.safe_unicode(message)
            message = message.encode(lazylibrarian.SYS_ENCODING)

        if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2:
            # Limit the size of the "in-memory" log, as gets slow if too long
            lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message))
            if len(lazylibrarian.LOGLIST) > formatter.check_int(lazylibrarian.CONFIG['LOGLIMIT'], 500):
                del lazylibrarian.LOGLIST[-1]

        message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message)

        if level == 'DEBUG':
            logger.debug(message)
        elif level == 'INFO':
            logger.info(message)
        elif level == 'WARNING':
            logger.warning(message)
        else:
            logger.error(message)
コード例 #13
0
ファイル: images.py プロジェクト: DarkSir23/LazyLibrarian
def getBookCover(bookID=None, src=None):
    """ Return link to a local file containing a book cover image for a bookid, and which source used.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing cover image (if you have a dev key)
            4. LibraryThing whatwork (if available)
            5. Goodreads search (if book was imported from goodreads)
            6. OpenLibrary image
            7. Google isbn search (if google has a link to book for sale)
            8. Google images search (if lazylibrarian config allows)

        src = cache, cover, goodreads, librarything, whatwork, googleisbn, openlibrary, googleimage
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None, src

    if not src:
        src = ''
    logger.debug("Getting %s cover for %s" % (src, bookID))
    # noinspection PyBroadException
    try:
        cachedir = lazylibrarian.CACHEDIR
        coverfile = os.path.join(cachedir, "book", bookID + '.jpg')
        if not src or src == 'cache' or src == 'current':
            if os.path.isfile(coverfile):  # use cached image if there is one
                lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink, 'cache'
            elif src:
                lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
                return None, src

        myDB = database.DBConnection()
        if not src or src == 'cover':
            item = myDB.match('select BookFile from books where bookID=?', (bookID,))
            if item:
                bookfile = item['BookFile']
                if bookfile:  # we may have a cover.jpg in the same folder
                    bookdir = os.path.dirname(bookfile)
                    coverimg = os.path.join(bookdir, "cover.jpg")
                    if os.path.isfile(coverimg):
                        if src:
                            coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg')
                            coverlink = 'cache/book/' + bookID + '_cover.jpg'
                            logger.debug("Caching cover.jpg for %s" % bookID)
                        else:
                            coverlink = 'cache/book/' + bookID + '.jpg'
                            logger.debug("Caching cover.jpg for %s" % coverfile)
                        _ = safe_copy(coverimg, coverfile)
                        return coverlink, src
            if src:
                logger.debug('No cover.jpg found for %s' % bookID)
                return None, src

        # see if librarything  has a cover
        if not src or src == 'librarything':
            if lazylibrarian.CONFIG['LT_DEVKEY']:
                cmd = 'select BookISBN from books where bookID=?'
                item = myDB.match(cmd, (bookID,))
                if item and item['BookISBN']:
                    img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % (
                           lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN'])
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_lt', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    # if librarything has no image they return a 1x1 gif
                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching librarything cover for %s" % bookID)
                        return coverlink, 'librarything'
                    else:
                        logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                else:
                    logger.debug("No isbn for %s" % bookID)
            if src:
                return None, src

        # see if librarything workpage has a cover
        if not src or src == 'whatwork':
            work = getBookWork(bookID, "Cover")
            if work:
                try:
                    img = work.split('workCoverImage')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('workCoverImage not found in work page for %s' % bookID)

                try:
                    img = work.split('og:image')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        if success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('og:image not found in work page for %s' % bookID)
            else:
                logger.debug('No work page for %s' % bookID)
            if src:
                return None, src

        cmd = 'select BookName,AuthorName,BookLink,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        item = myDB.match(cmd, (bookID,))
        safeparams = ''
        booklink = ''
        if item:
            title = safe_unicode(item['BookName'])
            author = safe_unicode(item['AuthorName'])
            if PY2:
                title = title.encode(lazylibrarian.SYS_ENCODING)
                author = author.encode(lazylibrarian.SYS_ENCODING)
            booklink = item['BookLink']
            safeparams = quote_plus("%s %s" % (author, title))

        # try to get a cover from goodreads
        if not src or src == 'goodreads':
            if booklink and 'goodreads' in booklink:
                # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
                # and scrape the page for og:image
                # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
                # 1388267702i/16304._UY475_SS475_.jpg"/>
                # to get the cover
                result, success = fetchURL(booklink)
                if success:
                    try:
                        img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        try:
                            img = result.split('og:image')[1].split('="')[1].split('"')[0]
                        except IndexError:
                            img = None
                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'goodreads':
                            coverlink, success, _ = cache_img("book", bookID + '_gr', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'goodreads'
                        else:
                            logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in goodreads page for %s" % bookID)
                else:
                    logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result))
            if src:
                return None, src

        # try to get a cover from openlibrary
        if not src or src == 'openlibrary':
            if item['BookISBN']:
                baseurl = 'https://openlibrary.org/api/books?format=json&jscmd=data&bibkeys=ISBN:'
                result, success = fetchURL(baseurl + item['BookISBN'])
                if success:
                    try:
                        source = json.loads(result)  # type: dict
                    except Exception as e:
                        logger.debug("OpenLibrary json error: %s" % e)
                        source = []

                    img = ''
                    if source:
                        # noinspection PyUnresolvedReferences
                        k = source.keys()[0]
                        try:
                            img = source[k]['cover']['medium']
                        except KeyError:
                            try:
                                img = source[k]['cover']['large']
                            except KeyError:
                                logger.debug("No openlibrary image for %s" % item['BookISBN'])

                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'openlibrary':
                            coverlink, success, _ = cache_img("book", bookID + '_ol', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty openlibrary image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching openlibrary cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'openlibrary'
                else:
                    logger.debug("OpenLibrary error: %s" % result)
            if src:
                return None, src

        if not src or src == 'googleisbn':
            # try a google isbn page search...
            # there is no image returned if google doesn't have a link for buying the book
            if safeparams:
                URL = "http://www.google.com/search?q=ISBN+" + safeparams
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('imgurl=')[1].split('&imgrefurl')[0]
                    except IndexError:
                        try:
                            img = result.split('img src="')[1].split('"')[0]
                        except IndexError:
                            img = None

                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_gi', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching google isbn cover for %s %s" %
                                         (item['AuthorName'], item['BookName']))
                            return coverlink, 'google isbn'
                        else:
                            logger.debug("Error caching google image %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in google isbn page for %s" % bookID)
                else:
                    logger.debug("Failed to fetch url from google")
            else:
                logger.debug("No parameters for google isbn search for %s" % bookID)
            if src:
                return None, src

        if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']:
            # try a google image search...
            # tbm=isch      search images
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            if safeparams:
                URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
                img = None
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        img = None

                if img and img.startswith('http'):
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_gb', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching google search cover for %s %s" %
                                     (item['AuthorName'], item['BookName']))
                        return coverlink, 'google image'
                    else:
                        logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
                else:
                    logger.debug("No image found in google page for %s" % bookID)
            else:
                logger.debug("No parameters for google image search for %s" % bookID)
            if src:
                return None, src

        logger.debug("No image found from any configured source")
        return None, src
    except Exception:
        logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc())
    return None, src
コード例 #14
0
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    global ALLOW_NEW, LAST_NEW
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
        item = myDB.match(cmd, (bookID, ))
    else:
        cmd = 'select SeriesName from series where SeriesID=?'
        cacheLocation = "SeriesCache"
        item = myDB.match(cmd, (seriesID, ))
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG[
                    'CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    if ALLOW_NEW:
                        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug(
                        "getBookWork: Returning Cached entry for %s %s" %
                        (bookID, reason))
                else:
                    logger.debug(
                        "getBookWork: Returning Cached workpage for %s" %
                        bookID)
            else:
                logger.debug(
                    "getBookWork: Returning Cached seriespage for %s" %
                    item['seriesName'])

            if PY2:
                with open(workfile, "r") as cachefile:
                    source = cachefile.read()
            else:
                # noinspection PyArgumentList
                with open(workfile, "r",
                          errors="backslashreplace") as cachefile:
                    source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if not ALLOW_NEW:
                # don't nag. Show message no more than every 12 hrs
                timenow = int(time.time())
                if check_int(LAST_NEW, 0) + 43200 < timenow:
                    logger.warn("New WhatWork is disabled")
                    LAST_NEW = timenow
                return None
            if bookID:
                title = safe_unicode(item['BookName'])
                author = safe_unicode(item['AuthorName'])
                if PY2:
                    title = title.encode(lazylibrarian.SYS_ENCODING)
                    author = author.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                      (quote_plus(author), quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName'])
                if PY2:
                    seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % quote_plus(
                    seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                # noinspection PyBroadException
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                    except IndexError:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[
                            'BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            # noinspection PyBroadException
                            try:
                                workpage = result.split('<link>')[1].split(
                                    '</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split(
                                        '</error>')[0]
                                except IndexError:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug("Librarything: [%s] for ISBN %s" %
                                             (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' +
                                     item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug("getBookWork: Caching workpage for %s" %
                                     workfile)
                    else:
                        logger.debug(
                            "getBookWork: Caching series page for %s" %
                            workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug(
                        "getBookWork: Unable to cache workpage, got %s" %
                        result)
                else:
                    logger.debug(
                        "getBookWork: Unable to cache series page, got %s" %
                        result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None
コード例 #15
0
ファイル: bookwork.py プロジェクト: Noppadet/LazyLibrarian
def getBookCover(bookID=None):
    if not bookID:
        logger.error("getBookCover - No bookID")
        return None

    myDB = database.DBConnection()
     
    logger.debug("getBookCover: Fetching book cover for %s" % bookID)   
    item = myDB.action('select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID).fetchone()
    if item:
        title = formatter.safe_unicode(item['BookName']).encode('utf-8')
        author = formatter.safe_unicode(item['AuthorName']).encode('utf-8')
        booklink = item['BookLink']
        safeparams = urllib.quote_plus("%s %s" % (author, title))
        
        cachedir = os.path.join(str(lazylibrarian.PROG_DIR),
                                'data' + os.sep + 'images' + os.sep + 'cache')
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)
        coverfile = os.path.join(cachedir, bookID + '.jpg')
        coverlink = os.path.join('images' + os.sep + 'cache', bookID + '.jpg')
        covertype = ""
        if os.path.isfile(coverfile):
            # use cached image if possible to speed up refreshactiveauthors and librarysync re-runs
            covertype = "cached"
        
        if not covertype and 'goodreads' in booklink:
            # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
            # and scrape the page for og:image
            # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/>
            # to get the cover
                
            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_GOODREADS:
                time.sleep(1)
                lazylibrarian.LAST_GOODREADS = time_now
            result, success = fetchURL(booklink)
            if success:
                try:
                    img = result.split('og:image')[1].split('content="')[1].split('"/>')[0]
                except IndexError:
                    img = None
                if img and img.startswith('http') and not 'nocover' in img and not 'nophoto' in img:
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_GOODREADS:
                        time.sleep(1)
                        lazylibrarian.LAST_GOODREADS = time_now
                    result, success = fetchURL(img)
                    if success:
                        with open(coverfile, 'wb') as imgfile:
                            imgfile.write(result)
                        covertype = "goodreads"
                    else:
                        logger.debug("getBookCover: Error getting goodreads image for %s, [%s]" % (img, result))
                else:
                    logger.debug("getBookCover: No image found in goodreads page for %s" % bookID)
            else:
                logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result))
      
        # if this failed, try a google image search...
   
        if not covertype:
            # tbm=isch      search books
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            URL="https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
            result, success = fetchURL(URL)
            if success:
                try:
                    img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
                except IndexError:
                    img = None
                if img and img.startswith('http'):
                    result, success = fetchURL(img)
                    if success:
                        with open(coverfile, 'wb') as imgfile:
                            imgfile.write(result)
                        covertype = "google"
                    else:
                        logger.debug("getBookCover: Error getting google image %s, [%s]" % (img, result))
                else:
                    logger.debug("getBookCover: No image found in google page for %s" % bookID)
            else:
                logger.debug("getBookCover: Error getting google page for %s, [%s]" % (safeparams, result))
        
        if covertype:
            # image downloaded, or was already there, now return link to file in cache
            logger.debug("getBookCover: Found %s cover for %s %s" % (covertype, author, title))
            return coverlink
        return None
コード例 #16
0
ファイル: images.py プロジェクト: knobunc/LazyLibrarian
def getBookCover(bookID=None, src=None):
    """ Return link to a local file containing a book cover image for a bookid, and which source used.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing cover image (if you have a dev key)
            4. LibraryThing whatwork (if available)
            5. Goodreads search (if book was imported from goodreads)
            6. Google isbn search (if google has a link to book for sale)
            7. Google images search (if lazylibrarian config allows)

        src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None, src

    if not src:
        src = ''
    logger.debug("Getting %s cover for %s" % (src, bookID))
    # noinspection PyBroadException
    try:
        cachedir = lazylibrarian.CACHEDIR
        coverfile = os.path.join(cachedir, "book", bookID + '.jpg')
        if not src or src == 'cache' or src == 'current':
            if os.path.isfile(coverfile):  # use cached image if there is one
                lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink, 'cache'
            elif src:
                lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
                return None, src

        myDB = database.DBConnection()
        if not src or src == 'cover':
            item = myDB.match('select BookFile from books where bookID=?', (bookID,))
            if item:
                bookfile = item['BookFile']
                if bookfile:  # we may have a cover.jpg in the same folder
                    bookdir = os.path.dirname(bookfile)
                    coverimg = os.path.join(bookdir, "cover.jpg")
                    if os.path.isfile(coverimg):
                        if src:
                            coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg')
                            coverlink = 'cache/book/' + bookID + '_cover.jpg'
                            logger.debug("Caching cover.jpg for %s" % bookID)
                        else:
                            coverlink = 'cache/book/' + bookID + '.jpg'
                            logger.debug("Caching cover.jpg for %s" % coverfile)
                        _ = safe_copy(coverimg, coverfile)
                        return coverlink, src
            if src:
                logger.debug('No cover.jpg found for %s' % bookID)
                return None, src

        # see if librarything  has a cover
        if not src or src == 'librarything':
            if lazylibrarian.CONFIG['LT_DEVKEY']:
                cmd = 'select BookISBN from books where bookID=?'
                item = myDB.match(cmd, (bookID,))
                if item and item['BookISBN']:
                    img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % (
                           lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN'])
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_lt', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    # if librarything has no image they return a 1x1 gif
                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching librarything cover for %s" % bookID)
                        return coverlink, 'librarything'
                    else:
                        logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                else:
                    logger.debug("No isbn for %s" % bookID)
            if src:
                return None, src

        # see if librarything workpage has a cover
        if not src or src == 'whatwork':
            work = getBookWork(bookID, "Cover")
            if work:
                try:
                    img = work.split('workCoverImage')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('workCoverImage not found in work page for %s' % bookID)

                try:
                    img = work.split('og:image')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        if success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('og:image not found in work page for %s' % bookID)
            else:
                logger.debug('No work page for %s' % bookID)
            if src:
                return None, src

        cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        item = myDB.match(cmd, (bookID,))
        safeparams = ''
        booklink = ''
        if item:
            title = safe_unicode(item['BookName'])
            author = safe_unicode(item['AuthorName'])
            if PY2:
                title = title.encode(lazylibrarian.SYS_ENCODING)
                author = author.encode(lazylibrarian.SYS_ENCODING)
            booklink = item['BookLink']
            safeparams = quote_plus("%s %s" % (author, title))

        # try to get a cover from goodreads
        if not src or src == 'goodreads':
            if booklink and 'goodreads' in booklink:
                # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
                # and scrape the page for og:image
                # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
                # 1388267702i/16304._UY475_SS475_.jpg"/>
                # to get the cover
                result, success = fetchURL(booklink)
                if success:
                    try:
                        img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        try:
                            img = result.split('og:image')[1].split('="')[1].split('"')[0]
                        except IndexError:
                            img = None
                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'goodreads':
                            coverlink, success, _ = cache_img("book", bookID + '_gr', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'goodreads'
                        else:
                            logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in goodreads page for %s" % bookID)
                else:
                    logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result))
            if src:
                return None, src

        if not src or src == 'googleisbn':
            # try a google isbn page search...
            # there is no image returned if google doesn't have a link for buying the book
            if safeparams:
                URL = "http://www.google.com/search?q=ISBN+" + safeparams
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('imgurl=')[1].split('&imgrefurl')[0]
                    except IndexError:
                        try:
                            img = result.split('img src="')[1].split('"')[0]
                        except IndexError:
                            img = None

                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_gi', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching google isbn cover for %s %s" %
                                         (item['AuthorName'], item['BookName']))
                            return coverlink, 'google isbn'
                        else:
                            logger.debug("Error caching google image %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in google isbn page for %s" % bookID)
                else:
                    logger.debug("Failed to fetch url from google")
            else:
                logger.debug("No parameters for google isbn search for %s" % bookID)
            if src:
                return None, src

        if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']:
            # try a google image search...
            # tbm=isch      search images
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            if safeparams:
                URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
                img = None
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        img = None

                if img and img.startswith('http'):
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_gb', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching google search cover for %s %s" %
                                     (item['AuthorName'], item['BookName']))
                        return coverlink, 'google image'
                    else:
                        logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
                else:
                    logger.debug("No image found in google page for %s" % bookID)
            else:
                logger.debug("No parameters for google image search for %s" % bookID)
            if src:
                return None, src

        logger.debug("No image found from any configured source")
        return None, src
    except Exception:
        logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc())
    return None, src
コード例 #17
0
ファイル: bookwork.py プロジェクト: Noppadet/LazyLibrarian
def getBookWork(bookID=None):
    if not bookID:
        logger.error("getBookWork - No bookID")
        return None

    myDB = database.DBConnection()
     
    item = myDB.action('select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID).fetchone()
    if item:    
        cacheLocation = "WorkCache"
        # does the workpage need to expire?
        # expireafter = lazylibrarian.CACHE_AGE
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if not os.path.exists(cacheLocation):
            os.mkdir(cacheLocation)
        workfile = os.path.join(cacheLocation, bookID + '.html')
        
        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            logger.debug(u"getBookWork: Returning Cached response for %s" % workfile)
            with open(workfile, "r") as cachefile:
                source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            bookisbn = item['BookISBN']
            if bookisbn:
                URL='http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn
            else:
                title = formatter.safe_unicode(item['BookName']).encode('utf-8')
                author = formatter.safe_unicode(item['AuthorName']).encode('utf-8')
                safeparams = urllib.quote_plus("%s %s" % (author, title))
                URL='http://www.librarything.com/api/whatwork.php?title=' + safeparams  
            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_LIBRARYTHING:  # called within the last second?
                time.sleep(1)  # sleep 1 second to respect librarything api terms
            lazylibrarian.LAST_LIBRARYTHING = time_now
            result, success = fetchURL(URL)
            if success:
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0] 
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_LIBRARYTHING:  # called within the last second?
                        time.sleep(1)  # sleep 1 second to respect librarything api terms
                    lazylibrarian.LAST_LIBRARYTHING = time_now
                    result, success = fetchURL(workpage)
                except:
                    try:
                        errmsg = result.split('<error>')[1].split('</error>')[0]
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        logger.debug(u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1]))
                    except:
                        logger.debug(u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1])    
                        return None
                if success:
                    logger.debug(u"getBookWork: Caching response for %s" % workfile)
                    with open(workfile, "w") as cachefile:
                        cachefile.write(result)
                    return result
                else:
                    logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (workpage, result))
                return None
            else:
                logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (URL, result))
                return None
    else:
        logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)            
        return None
コード例 #18
0
ファイル: bookwork.py プロジェクト: NVRemoteDev/LazyLibrarian
def getBookCover(bookID=None):
    if not bookID:
        logger.error("getBookCover - No bookID")
        return None

    myDB = database.DBConnection()

    logger.debug("getBookCover: Fetching book cover for %s" % bookID)
    item = myDB.action(
        'select BookName,AuthorName,BookLink from books where bookID="%s"' %
        bookID).fetchone()
    if item:
        title = formatter.safe_unicode(item['BookName']).encode('utf-8')
        author = formatter.safe_unicode(item['AuthorName']).encode('utf-8')
        booklink = item['BookLink']
        safeparams = urllib.quote_plus("%s %s" % (author, title))

        cachedir = os.path.join(str(lazylibrarian.PROG_DIR),
                                'data' + os.sep + 'images' + os.sep + 'cache')
        if not os.path.isdir(cachedir):
            os.makedirs(cachedir)
        coverfile = os.path.join(cachedir, bookID + '.jpg')
        coverlink = os.path.join('images' + os.sep + 'cache', bookID + '.jpg')
        covertype = ""
        if os.path.isfile(coverfile):
            # use cached image if possible to speed up refreshactiveauthors and librarysync re-runs
            covertype = "cached"

        if not covertype and 'goodreads' in booklink:
            # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
            # and scrape the page for og:image
            # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/>
            # to get the cover

            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_GOODREADS:
                time.sleep(1)
                lazylibrarian.LAST_GOODREADS = time_now
            result, success = fetchURL(booklink)
            if success:
                try:
                    img = result.split('og:image')[1].split(
                        'content="')[1].split('"/>')[0]
                except IndexError:
                    img = None
                if img and img.startswith(
                        'http'
                ) and not 'nocover' in img and not 'nophoto' in img:
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_GOODREADS:
                        time.sleep(1)
                        lazylibrarian.LAST_GOODREADS = time_now
                    result, success = fetchURL(img)
                    if success:
                        with open(coverfile, 'wb') as imgfile:
                            imgfile.write(result)
                        covertype = "goodreads"
                    else:
                        logger.debug(
                            "getBookCover: Error getting goodreads image for %s, [%s]"
                            % (img, result))
                else:
                    logger.debug(
                        "getBookCover: No image found in goodreads page for %s"
                        % bookID)
            else:
                logger.debug("getBookCover: Error getting page %s, [%s]" %
                             (booklink, result))

        # if this failed, try a google image search...

        if not covertype:
            # tbm=isch      search books
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
            result, success = fetchURL(URL)
            if success:
                try:
                    img = result.split('url?q=')[1].split('">')[1].split(
                        'src="')[1].split('"')[0]
                except IndexError:
                    img = None
                if img and img.startswith('http'):
                    result, success = fetchURL(img)
                    if success:
                        with open(coverfile, 'wb') as imgfile:
                            imgfile.write(result)
                        covertype = "google"
                    else:
                        logger.debug(
                            "getBookCover: Error getting google image %s, [%s]"
                            % (img, result))
                else:
                    logger.debug(
                        "getBookCover: No image found in google page for %s" %
                        bookID)
            else:
                logger.debug(
                    "getBookCover: Error getting google page for %s, [%s]" %
                    (safeparams, result))

        if covertype:
            # image downloaded, or was already there, now return link to file in cache
            logger.debug("getBookCover: Found %s cover for %s %s" %
                         (covertype, author, title))
            return coverlink
        return None
コード例 #19
0
def getBookWork(bookID=None, reason=None):
    """ return the contents of the LibraryThing workpage for the given bookid
        preferably from the cache. If not already cached cache the results
        Return None if no workpage available """
    if not bookID:
        logger.error("getBookWork - No bookID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()

    item = myDB.match('select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID)
    if item:
        cacheLocation = "WorkCache"
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if not os.path.exists(cacheLocation):
            os.mkdir(cacheLocation)
        workfile = os.path.join(cacheLocation, bookID + '.html')

        # does the workpage need to expire?
        #if os.path.isfile(workfile):
        #    cache_modified_time = os.stat(workfile).st_mtime
        #    time_now = time.time()
        #    expiry = lazylibrarian.CACHE_AGE * 24 * 60 * 60  # expire cache after this many seconds
        #    if cache_modified_time < time_now - expiry:
        #        # Cache entry is too old, delete it
        #        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1

            logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" % (bookID, reason))
            with open(workfile, "r") as cachefile:
                source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            bookisbn = item['BookISBN']
            if bookisbn:
                URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn
            else:
                title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING)
                author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING)
                safeparams = urllib.quote_plus("%s %s" % (author, title))
                URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams
            librarything_wait()
            result, success = fetchURL(URL)
            if success:
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split('</error>')[0]
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        logger.debug(u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1]))
                    except Exception:
                        logger.debug(u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1])
                        return None
                if success:
                    logger.debug(u"getBookWork: Caching workpage for %s" % workfile)
                    with open(workfile, "w") as cachefile:
                        cachefile.write(result)
                    return result
                else:
                    logger.debug(u"getBookWork: Unable to cache workpage for %s, got %s" % (workpage, result))
                return None
            else:
                logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (URL, result))
                return None
    else:
        logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        return None
コード例 #20
0
def getBookCover(bookID=None):
    """ Return link to a local file containing a book cover image for a bookid.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing whatwork
            4. Goodreads search if book was imported from goodreads
            5. Google images search
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, bookID + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile)
        coverlink = 'cache/' + bookID + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1

    myDB = database.DBConnection()
    item = myDB.match('select BookFile from books where bookID="%s"' % bookID)
    if item:
        bookfile = item['BookFile']
        if bookfile:  # we may have a cover.jpg in the same folder
            bookdir = os.path.dirname(bookfile)
            coverimg = os.path.join(bookdir, "cover.jpg")
            if os.path.isfile(coverimg):
                logger.debug(u"getBookCover: Copying book cover to %s" % coverfile)
                shutil.copyfile(coverimg, coverfile)
                coverlink = 'cache/' + bookID + '.jpg'
                return coverlink

    # if no cover.jpg, see if librarything workpage has a cover
    work = getBookWork(bookID, "Cover")
    if work:
        try:
            img = work.split('og:image')[1].split('="')[1].split('"')[0]
            if img and img.startswith('http'):
                coverlink = cache_cover(bookID, img)
                if coverlink:
                    logger.debug(u"getBookCover: Caching librarything cover for %s" % bookID)
                    return coverlink
            else:
                logger.debug("getBookCover: No image found in work page for %s" % bookID)
        except IndexError:
            logger.debug('getBookCover: Image not found in work page for %s' % bookID)

    # not found in librarything work page, try to get a cover from goodreads or google instead

    item = myDB.match('select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID)
    if item:
        title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING)
        author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING)
        booklink = item['BookLink']
        safeparams = urllib.quote_plus("%s %s" % (author, title))

        if 'goodreads' in booklink:
            # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
            # and scrape the page for og:image
            # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/>
            # to get the cover

            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_GOODREADS:
                time.sleep(1)
                lazylibrarian.LAST_GOODREADS = time_now
            result, success = fetchURL(booklink)
            if success:
                try:
                    img = result.split('og:image')[1].split('="')[1].split('"')[0]
                except IndexError:
                    img = None
                if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_GOODREADS:
                        time.sleep(1)
                        lazylibrarian.LAST_GOODREADS = time_now
                    coverlink = cache_cover(bookID, img)
                    if coverlink:
                        logger.debug("getBookCover: Caching goodreads cover for %s %s" % (author, title))
                        return coverlink
                    else:
                        logger.debug("getBookCover: Error getting goodreads image for %s, [%s]" % (img, result))
                else:
                    logger.debug("getBookCover: No image found in goodreads page for %s" % bookID)
            else:
                logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result))

        # if this failed, try a google image search...
        # tbm=isch      search images
        # tbs=isz:l     large images
        # ift:jpg       jpeg file type
        URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink = cache_cover(bookID, img)
                if coverlink:
                    logger.debug("getBookCover: Caching google cover for %s %s" % (author, title))
                    return coverlink
                else:
                    logger.debug("getBookCover: Error getting google image %s, [%s]" % (img, result))
            else:
                logger.debug("getBookCover: No image found in google page for %s" % bookID)
        else:
            logger.debug("getBookCover: Error getting google page for %s, [%s]" % (safeparams, result))
    return None
コード例 #21
0
def getBookCover(bookID=None):
    """ Return link to a local file containing a book cover image for a bookid.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. LibraryThing whatwork
            3. Goodreads search if book was imported from goodreads
            4. Google images search
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None

    cachedir = os.path.join(str(lazylibrarian.PROG_DIR),
                            'data' + os.sep + 'images' + os.sep + 'cache')
    coverfile = os.path.join(cachedir, bookID + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug(u"getBookCover: Returning Cached response for %s" %
                     coverfile)
        coverlink = 'images/cache/' + bookID + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    work = getBookWork(bookID, "Cover")
    if work:
        try:
            img = work.split('og:image')[1].split('="')[1].split('"')[0]
            if img and img.startswith('http'):
                coverlink = cache_cover(bookID, img)
                if coverlink is not None:
                    logger.debug(
                        u"getBookCover: Caching librarything cover for %s" %
                        bookID)
                    return coverlink
            else:
                logger.debug(
                    "getBookCover: No image found in work page for %s" %
                    bookID)
        except IndexError:
            logger.debug('getBookCover: Image not found in work page for %s' %
                         bookID)

    # not found in librarything work page, try to get a cover from goodreads or google instead

    myDB = database.DBConnection()

    item = myDB.match(
        'select BookName,AuthorName,BookLink from books where bookID="%s"' %
        bookID)
    if item:
        title = safe_unicode(item['BookName']).encode(
            lazylibrarian.SYS_ENCODING)
        author = safe_unicode(item['AuthorName']).encode(
            lazylibrarian.SYS_ENCODING)
        booklink = item['BookLink']
        safeparams = urllib.quote_plus("%s %s" % (author, title))

        if 'goodreads' in booklink:
            # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
            # and scrape the page for og:image
            # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/>
            # to get the cover

            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_GOODREADS:
                time.sleep(1)
                lazylibrarian.LAST_GOODREADS = time_now
            result, success = fetchURL(booklink)
            if success:
                try:
                    img = result.split('og:image')[1].split('="')[1].split(
                        '"')[0]
                except IndexError:
                    img = None
                if img and img.startswith(
                        'http'
                ) and 'nocover' not in img and 'nophoto' not in img:
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_GOODREADS:
                        time.sleep(1)
                        lazylibrarian.LAST_GOODREADS = time_now
                    coverlink = cache_cover(bookID, img)
                    if coverlink is not None:
                        logger.debug(
                            "getBookCover: Caching goodreads cover for %s %s" %
                            (author, title))
                        return coverlink
                    else:
                        logger.debug(
                            "getBookCover: Error getting goodreads image for %s, [%s]"
                            % (img, result))
                else:
                    logger.debug(
                        "getBookCover: No image found in goodreads page for %s"
                        % bookID)
            else:
                logger.debug("getBookCover: Error getting page %s, [%s]" %
                             (booklink, result))

        # if this failed, try a google image search...
        # tbm=isch      search images
        # tbs=isz:l     large images
        # ift:jpg       jpeg file type
        URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split(
                    'src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink = cache_cover(bookID, img)
                if coverlink is not None:
                    logger.debug(
                        "getBookCover: Caching google cover for %s %s" %
                        (author, title))
                    return coverlink
                else:
                    logger.debug(
                        "getBookCover: Error getting google image %s, [%s]" %
                        (img, result))
            else:
                logger.debug(
                    "getBookCover: No image found in google page for %s" %
                    bookID)
        else:
            logger.debug(
                "getBookCover: Error getting google page for %s, [%s]" %
                (safeparams, result))
    return None
コード例 #22
0
ファイル: bookwork.py プロジェクト: knobunc/LazyLibrarian
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    global ALLOW_NEW, LAST_NEW
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
        item = myDB.match(cmd, (bookID,))
    else:
        cmd = 'select SeriesName from series where SeriesID=?'
        cacheLocation = "SeriesCache"
        item = myDB.match(cmd, (seriesID,))
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    if ALLOW_NEW:
                        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason))
                else:
                    logger.debug("getBookWork: Returning Cached workpage for %s" % bookID)
            else:
                logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName'])

            if PY2:
                with open(workfile, "r") as cachefile:
                    source = cachefile.read()
            else:
                # noinspection PyArgumentList
                with open(workfile, "r", errors="backslashreplace") as cachefile:
                    source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if not ALLOW_NEW:
                # don't nag. Show message no more than every 12 hrs
                timenow = int(time.time())
                if check_int(LAST_NEW, 0) + 43200 < timenow:
                    logger.warn("New WhatWork is disabled")
                    LAST_NEW = timenow
                return None
            if bookID:
                title = safe_unicode(item['BookName'])
                author = safe_unicode(item['AuthorName'])
                if PY2:
                    title = title.encode(lazylibrarian.SYS_ENCODING)
                    author = author.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                      (quote_plus(author), quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName'])
                if PY2:
                    seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                # noinspection PyBroadException
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split('</error>')[0]
                    except IndexError:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            # noinspection PyBroadException
                            try:
                                workpage = result.split('<link>')[1].split('</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split('</error>')[0]
                                except IndexError:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' + item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug("getBookWork: Caching workpage for %s" % workfile)
                    else:
                        logger.debug("getBookWork: Caching series page for %s" % workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug("getBookWork: Unable to cache workpage, got %s" % result)
                else:
                    logger.debug("getBookWork: Unable to cache series page, got %s" % result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None
コード例 #23
0
ファイル: bookwork.py プロジェクト: steflavoie/LazyLibrarian
def getBookCover(bookID=None):
    """ Return link to a local file containing a book cover image for a bookid.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing whatwork
            4. Goodreads search if book was imported from goodreads
            5. Google images search
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, "book", bookID + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug(u"getBookCover: Returning Cached response for %s" %
                     coverfile)
        coverlink = 'cache/book/' + bookID + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1

    myDB = database.DBConnection()
    item = myDB.match('select BookFile from books where bookID=?', (bookID, ))
    if item:
        bookfile = item['BookFile']
        if bookfile:  # we may have a cover.jpg in the same folder
            bookdir = os.path.dirname(bookfile)
            coverimg = os.path.join(bookdir, "cover.jpg")
            if os.path.isfile(coverimg):
                logger.debug(u"getBookCover: Copying book cover to %s" %
                             coverfile)
                shutil.copyfile(coverimg, coverfile)
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink

    # no cover.jpg, try to get a cover from goodreads
    cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?'
    cmd += ' and books.AuthorID = authors.AuthorID'
    item = myDB.match(cmd, (bookID, ))
    safeparams = ''
    if item:
        title = safe_unicode(item['BookName'])
        title = title.encode(lazylibrarian.SYS_ENCODING)
        author = safe_unicode(item['AuthorName'])
        author = author.encode(lazylibrarian.SYS_ENCODING)
        booklink = item['BookLink']
        safeparams = urllib.quote_plus("%s %s" % (author, title))
        if 'goodreads' in booklink:
            # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
            # and scrape the page for og:image
            # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
            # 1388267702i/16304._UY475_SS475_.jpg"/>
            # to get the cover

            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_GOODREADS:
                time.sleep(1)
                lazylibrarian.LAST_GOODREADS = time_now
            result, success = fetchURL(booklink)
            if success:
                try:
                    img = result.split('id="coverImage"')[1].split(
                        'src="')[1].split('"')[0]
                except IndexError:
                    try:
                        img = result.split('og:image')[1].split('="')[1].split(
                            '"')[0]
                    except IndexError:
                        img = None
                if img and img.startswith(
                        'http'
                ) and 'nocover' not in img and 'nophoto' not in img:
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_GOODREADS:
                        time.sleep(1)
                        lazylibrarian.LAST_GOODREADS = time_now
                    coverlink, success = cache_img("book", bookID, img)
                    if success:
                        logger.debug(
                            "getBookCover: Caching goodreads cover for %s %s" %
                            (item['AuthorName'], item['BookName']))
                        return coverlink
                    else:
                        logger.debug(
                            "getBookCover: Error getting goodreads image for %s, [%s]"
                            % (img, coverlink))
                else:
                    logger.debug(
                        "getBookCover: No image found in goodreads page for %s"
                        % bookID)
            else:
                logger.debug("getBookCover: Error getting page %s, [%s]" %
                             (booklink, result))

    # nothing from goodreads, see if librarything workpage has a cover
    work = getBookWork(bookID, "Cover")
    if work:
        try:
            img = work.split('workCoverImage')[1].split('="')[1].split('"')[0]
            if img and img.startswith('http'):
                coverlink, success = cache_img("book", bookID, img)
                if success:
                    logger.debug(
                        u"getBookCover: Caching librarything cover for %s" %
                        bookID)
                    return coverlink
                else:
                    logger.debug(
                        'getBookCover: Failed to cache image for %s [%s]' %
                        (img, coverlink))
            else:
                logger.debug(
                    "getBookCover: No image found in work page for %s" %
                    bookID)
        except IndexError:
            logger.debug('getBookCover: Image not found in work page for %s' %
                         bookID)

        try:
            img = work.split('og:image')[1].split('="')[1].split('"')[0]
            if img and img.startswith('http'):
                coverlink, success = cache_img("book", bookID, img)
                if success:
                    logger.debug(
                        u"getBookCover: Caching librarything cover for %s" %
                        bookID)
                    return coverlink
                else:
                    logger.debug(
                        'getBookCover: Failed to cache image for %s [%s]' %
                        (img, coverlink))
            else:
                logger.debug(
                    "getBookCover: No image found in work page for %s" %
                    bookID)
        except IndexError:
            logger.debug('getBookCover: Image not found in work page for %s' %
                         bookID)

    if safeparams:
        # if all else fails, try a google image search...
        # tbm=isch      search images
        # tbs=isz:l     large images
        # ift:jpg       jpeg file type
        URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split(
                    'src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink, success = cache_img("book", bookID, img)
                if success:
                    logger.debug(
                        "getBookCover: Caching google cover for %s %s" %
                        (item['AuthorName'], item['BookName']))
                    return coverlink
                else:
                    logger.debug(
                        "getBookCover: Error getting google image %s, [%s]" %
                        (img, coverlink))
            else:
                logger.debug(
                    "getBookCover: No image found in google page for %s" %
                    bookID)
        else:
            logger.debug(
                "getBookCover: Error getting google page for %s, [%s]" %
                (safeparams, result))
    return None
コード例 #24
0
ファイル: bookwork.py プロジェクト: NVRemoteDev/LazyLibrarian
def getBookWork(bookID=None):
    if not bookID:
        logger.error("getBookWork - No bookID")
        return None

    myDB = database.DBConnection()

    item = myDB.action(
        'select BookName,AuthorName,BookISBN from books where bookID="%s"' %
        bookID).fetchone()
    if item:
        cacheLocation = "WorkCache"
        # does the workpage need to expire?
        # expireafter = lazylibrarian.CACHE_AGE
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if not os.path.exists(cacheLocation):
            os.mkdir(cacheLocation)
        workfile = os.path.join(cacheLocation, bookID + '.html')

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            logger.debug(u"getBookWork: Returning Cached response for %s" %
                         workfile)
            with open(workfile, "r") as cachefile:
                source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            bookisbn = item['BookISBN']
            if bookisbn:
                URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn
            else:
                title = formatter.safe_unicode(
                    item['BookName']).encode('utf-8')
                author = formatter.safe_unicode(
                    item['AuthorName']).encode('utf-8')
                safeparams = urllib.quote_plus("%s %s" % (author, title))
                URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams
            time_now = int(time.time())
            if time_now <= lazylibrarian.LAST_LIBRARYTHING:  # called within the last second?
                time.sleep(
                    1)  # sleep 1 second to respect librarything api terms
            lazylibrarian.LAST_LIBRARYTHING = time_now
            result, success = fetchURL(URL)
            if success:
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    time_now = int(time.time())
                    if time_now <= lazylibrarian.LAST_LIBRARYTHING:  # called within the last second?
                        time.sleep(
                            1
                        )  # sleep 1 second to respect librarything api terms
                    lazylibrarian.LAST_LIBRARYTHING = time_now
                    result, success = fetchURL(workpage)
                except:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        logger.debug(
                            u"getBookWork: Got librarything error page: [%s] %s"
                            % (errmsg, URL.split('?')[1]))
                    except:
                        logger.debug(
                            u"getBookWork: Unable to find workpage link for %s"
                            % URL.split('?')[1])
                        return None
                if success:
                    logger.debug(u"getBookWork: Caching response for %s" %
                                 workfile)
                    with open(workfile, "w") as cachefile:
                        cachefile.write(result)
                    return result
                else:
                    logger.debug(
                        u"getBookWork: Unable to cache response for %s, got %s"
                        % (workpage, result))
                return None
            else:
                logger.debug(
                    u"getBookWork: Unable to cache response for %s, got %s" %
                    (URL, result))
                return None
    else:
        logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        return None
コード例 #25
0
def getBookWork(bookID=None, reason=None):
    """ return the contents of the LibraryThing workpage for the given bookid
        preferably from the cache. If not already cached cache the results
        Return None if no workpage available """
    if not bookID:
        logger.error("getBookWork - No bookID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()

    item = myDB.match(
        'select BookName,AuthorName,BookISBN from books where bookID="%s"' %
        bookID)
    if item:
        cacheLocation = "WorkCache"
        # does the workpage need to expire?
        # expireafter = lazylibrarian.CACHE_AGE
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if not os.path.exists(cacheLocation):
            os.mkdir(cacheLocation)
        workfile = os.path.join(cacheLocation, bookID + '.html')

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1

            logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" %
                         (bookID, reason))
            with open(workfile, "r") as cachefile:
                source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            bookisbn = item['BookISBN']
            if bookisbn:
                URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn
            else:
                title = safe_unicode(item['BookName']).encode(
                    lazylibrarian.SYS_ENCODING)
                author = safe_unicode(item['AuthorName']).encode(
                    lazylibrarian.SYS_ENCODING)
                safeparams = urllib.quote_plus("%s %s" % (author, title))
                URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams
            librarything_wait()
            result, success = fetchURL(URL)
            if success:
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        logger.debug(
                            u"getBookWork: Got librarything error page: [%s] %s"
                            % (errmsg, URL.split('?')[1]))
                    except Exception:
                        logger.debug(
                            u"getBookWork: Unable to find workpage link for %s"
                            % URL.split('?')[1])
                        return None
                if success:
                    logger.debug(u"getBookWork: Caching response for %s" %
                                 workfile)
                    with open(workfile, "w") as cachefile:
                        cachefile.write(result)
                    return result
                else:
                    logger.debug(
                        u"getBookWork: Unable to cache response for %s, got %s"
                        % (workpage, result))
                return None
            else:
                logger.debug(
                    u"getBookWork: Unable to cache response for %s, got %s" %
                    (URL, result))
                return None
    else:
        logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        return None
コード例 #26
0
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        # need to specify authors.AuthorName here as function is called during dbupgrade v15 to v16
        # while books.authorname column is still present
        cmd = 'select BookName,authors.AuthorName,BookISBN from books,authors where bookID="%s"' % bookID
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
    else:
        cmd = 'select SeriesName from series where SeriesID="%s"' % seriesID
        cacheLocation = "SeriesCache"
    item = myDB.match(cmd)
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if not os.path.exists(cacheLocation):
            os.mkdir(cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG[
                    'CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    os.remove(workfile)

            #os.remove(workfile)  # ignore cache for testing

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug(
                        u"getBookWork: Returning Cached entry for %s %s" %
                        (bookID, reason))
                else:
                    logger.debug(
                        u"getBookWork: Returning Cached workpage for %s" %
                        bookID)
            else:
                logger.debug(
                    u"getBookWork: Returning Cached seriespage for %s" %
                    item['seriesName'])

            with open(workfile, "r") as cachefile:
                source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if bookID:
                title = safe_unicode(item['BookName']).encode(
                    lazylibrarian.SYS_ENCODING)
                author = safe_unicode(item['AuthorName']).encode(
                    lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                        (urllib.quote_plus(author), urllib.quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName']).encode(
                    lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % urllib.quote_plus(
                    seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                    except Exception:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[
                            'BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            try:
                                workpage = result.split('<link>')[1].split(
                                    '</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split(
                                        '</error>')[0]
                                except Exception:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug(
                                    "getBookWork: Librarything: [%s] for ISBN %s"
                                    % (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "getBookWork: Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' +
                                     item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug(u"getBookWork: Caching workpage for %s" %
                                     workfile)
                    else:
                        logger.debug(
                            u"getBookWork: Caching series page for %s" %
                            workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug(
                        u"getBookWork: Unable to cache workpage, got %s" %
                        result)
                else:
                    logger.debug(
                        u"getBookWork: Unable to cache series page, got %s" %
                        result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None