def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Ensure messages are utf-8 as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message).encode('utf-8') if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def log(self, message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message).encode( lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGFULL is True: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.LOGLIMIT: del lazylibrarian.LOGLIST[-1] message = threadname + ' : ' + message if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warn(message) else: logger.error(message)
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid, )) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img( "author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') coverfile = os.path.join(cachedir, authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'images/cache/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() authors = myDB.select( 'select AuthorName from authors where AuthorID = "%s"' % authorid) if authors: authorname = safe_unicode(authors[0][0]).encode( lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg&as_q=" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(authorid, img) if coverlink is not None: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, result)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid,)) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img("author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def getWorkSeries(bookID=None): if not bookID: logger.error("getWorkSeries - No bookID") return None, None work = getBookWork(bookID) if work: try: series = work.split('<a href="/series/')[1].split('">')[1].split('</a>')[0] except IndexError: return None, None series = formatter.safe_unicode(series).encode('utf-8') if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0] series = series.split(' (')[0] else: seriesnum = None return series, seriesnum return None, None
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" if 'windows' in platform.system().lower( ): # windows cp1252 can't handle some accents message = formatter.unaccented(message) elif PY2: message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > formatter.check_int( lazylibrarian.CONFIG['LOGLIMIT'], 500): del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def getWorkSeries(bookID=None): if not bookID: logger.error("getWorkSeries - No bookID") return None, None work = getBookWork(bookID) if work: try: series = work.split('<a href="/series/')[1].split('">')[1].split( '</a>')[0] except IndexError: return None, None series = formatter.safe_unicode(series).encode('utf-8') if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0] series = series.split(' (')[0] else: seriesnum = None return series, seriesnum return None, None
def getWorkSeries(bookID=None): """ Return the series name and number in series for the given bookid Returns None if no series or series number """ if not bookID: logger.error("getWorkSeries - No bookID") return None, None work = getBookWork(bookID, "Series") if work: try: series = work.split('<a href="/series/')[1].split('">')[1].split('</a>')[0] except IndexError: return None, None series = safe_unicode(series).encode(lazylibrarian.SYS_ENCODING) if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0] series = series.split(' (')[0] else: seriesnum = None return series, seriesnum return None, None
def getWorkSeries(bookID=None): """ Return the series name and number in series for the given bookid Returns None if no series or series number """ if not bookID: logger.error("getWorkSeries - No bookID") return None, None work = getBookWork(bookID, "Series") if work: try: series = work.split('<a href="/series/')[1].split('">')[1].split( '</a>')[0] except IndexError: return None, None series = safe_unicode(series).encode(lazylibrarian.SYS_ENCODING) if series and '(' in series: seriesnum = series.split('(')[1].split(')')[0] series = series.split(' (')[0] else: seriesnum = None return series, seriesnum return None, None
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" # Ensure messages are correctly encoded as some author names contain accents and the web page doesnt like them message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > lazylibrarian.CONFIG['LOGLIMIT']: del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" if 'windows' in platform.system().lower(): # windows cp1252 can't handle some accents message = formatter.unaccented(message) elif PY2: message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > formatter.check_int(lazylibrarian.CONFIG['LOGLIMIT'], 500): del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. OpenLibrary image 7. Google isbn search (if google has a link to book for sale) 8. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, openlibrary, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src # try to get a cover from openlibrary if not src or src == 'openlibrary': if item['BookISBN']: baseurl = 'https://openlibrary.org/api/books?format=json&jscmd=data&bibkeys=ISBN:' result, success = fetchURL(baseurl + item['BookISBN']) if success: try: source = json.loads(result) # type: dict except Exception as e: logger.debug("OpenLibrary json error: %s" % e) source = [] img = '' if source: # noinspection PyUnresolvedReferences k = source.keys()[0] try: img = source[k]['cover']['medium'] except KeyError: try: img = source[k]['cover']['large'] except KeyError: logger.debug("No openlibrary image for %s" % item['BookISBN']) if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'openlibrary': coverlink, success, _ = cache_img("book", bookID + '_ol', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty openlibrary image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching openlibrary cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'openlibrary' else: logger.debug("OpenLibrary error: %s" % result) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID, )) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID, )) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG[ 'CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug( "getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug( "getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug( "getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus( seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[ 'BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split( '</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug( "getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug( "getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug( "getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def getBookCover(bookID=None): if not bookID: logger.error("getBookCover - No bookID") return None myDB = database.DBConnection() logger.debug("getBookCover: Fetching book cover for %s" % bookID) item = myDB.action('select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID).fetchone() if item: title = formatter.safe_unicode(item['BookName']).encode('utf-8') author = formatter.safe_unicode(item['AuthorName']).encode('utf-8') booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') if not os.path.isdir(cachedir): os.makedirs(cachedir) coverfile = os.path.join(cachedir, bookID + '.jpg') coverlink = os.path.join('images' + os.sep + 'cache', bookID + '.jpg') covertype = "" if os.path.isfile(coverfile): # use cached image if possible to speed up refreshactiveauthors and librarysync re-runs covertype = "cached" if not covertype and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split('content="')[1].split('"/>')[0] except IndexError: img = None if img and img.startswith('http') and not 'nocover' in img and not 'nophoto' in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(img) if success: with open(coverfile, 'wb') as imgfile: imgfile.write(result) covertype = "goodreads" else: logger.debug("getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... if not covertype: # tbm=isch search books # tbs=isz:l large images # ift:jpg jpeg file type URL="https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): result, success = fetchURL(img) if success: with open(coverfile, 'wb') as imgfile: imgfile.write(result) covertype = "google" else: logger.debug("getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in google page for %s" % bookID) else: logger.debug("getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) if covertype: # image downloaded, or was already there, now return link to file in cache logger.debug("getBookCover: Found %s cover for %s %s" % (covertype, author, title)) return coverlink return None
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. Google isbn search (if google has a link to book for sale) 7. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def getBookWork(bookID=None): if not bookID: logger.error("getBookWork - No bookID") return None myDB = database.DBConnection() item = myDB.action('select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID).fetchone() if item: cacheLocation = "WorkCache" # does the workpage need to expire? # expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached response for %s" % workfile) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL='http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = formatter.safe_unicode(item['BookName']).encode('utf-8') author = formatter.safe_unicode(item['AuthorName']).encode('utf-8') safeparams = urllib.quote_plus("%s %s" % (author, title)) URL='http://www.librarything.com/api/whatwork.php?title=' + safeparams time_now = int(time.time()) if time_now <= lazylibrarian.LAST_LIBRARYTHING: # called within the last second? time.sleep(1) # sleep 1 second to respect librarything api terms lazylibrarian.LAST_LIBRARYTHING = time_now result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] time_now = int(time.time()) if time_now <= lazylibrarian.LAST_LIBRARYTHING: # called within the last second? time.sleep(1) # sleep 1 second to respect librarything api terms lazylibrarian.LAST_LIBRARYTHING = time_now result, success = fetchURL(workpage) except: try: errmsg = result.split('<error>')[1].split('</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug(u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except: logger.debug(u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching response for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (workpage, result)) return None else: logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookCover(bookID=None): if not bookID: logger.error("getBookCover - No bookID") return None myDB = database.DBConnection() logger.debug("getBookCover: Fetching book cover for %s" % bookID) item = myDB.action( 'select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID).fetchone() if item: title = formatter.safe_unicode(item['BookName']).encode('utf-8') author = formatter.safe_unicode(item['AuthorName']).encode('utf-8') booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') if not os.path.isdir(cachedir): os.makedirs(cachedir) coverfile = os.path.join(cachedir, bookID + '.jpg') coverlink = os.path.join('images' + os.sep + 'cache', bookID + '.jpg') covertype = "" if os.path.isfile(coverfile): # use cached image if possible to speed up refreshactiveauthors and librarysync re-runs covertype = "cached" if not covertype and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split( 'content="')[1].split('"/>')[0] except IndexError: img = None if img and img.startswith( 'http' ) and not 'nocover' in img and not 'nophoto' in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(img) if success: with open(coverfile, 'wb') as imgfile: imgfile.write(result) covertype = "goodreads" else: logger.debug( "getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... if not covertype: # tbm=isch search books # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): result, success = fetchURL(img) if success: with open(coverfile, 'wb') as imgfile: imgfile.write(result) covertype = "google" else: logger.debug( "getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in google page for %s" % bookID) else: logger.debug( "getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) if covertype: # image downloaded, or was already there, now return link to file in cache logger.debug("getBookCover: Found %s cover for %s %s" % (covertype, author, title)) return coverlink return None
def getBookWork(bookID=None, reason=None): """ return the contents of the LibraryThing workpage for the given bookid preferably from the cache. If not already cached cache the results Return None if no workpage available """ if not bookID: logger.error("getBookWork - No bookID") return None if not reason: reason = "" myDB = database.DBConnection() item = myDB.match('select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID) if item: cacheLocation = "WorkCache" cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') # does the workpage need to expire? #if os.path.isfile(workfile): # cache_modified_time = os.stat(workfile).st_mtime # time_now = time.time() # expiry = lazylibrarian.CACHE_AGE * 24 * 60 * 60 # expire cache after this many seconds # if cache_modified_time < time_now - expiry: # # Cache entry is too old, delete it # os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" % (bookID, reason)) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s %s" % (author, title)) URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams librarything_wait() result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug(u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except Exception: logger.debug(u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching workpage for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug(u"getBookWork: Unable to cache workpage for %s, got %s" % (workpage, result)) return None else: logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing whatwork 4. Goodreads search if book was imported from goodreads 5. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'cache/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() item = myDB.match('select BookFile from books where bookID="%s"' % bookID) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): logger.debug(u"getBookCover: Copying book cover to %s" % coverfile) shutil.copyfile(coverimg, coverfile) coverlink = 'cache/' + bookID + '.jpg' return coverlink # if no cover.jpg, see if librarything workpage has a cover work = getBookWork(bookID, "Cover") if work: try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink: logger.debug(u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug("getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) # not found in librarything work page, try to get a cover from goodreads or google instead item = myDB.match('select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID) if item: title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink = cache_cover(bookID, img) if coverlink: logger.debug("getBookCover: Caching goodreads cover for %s %s" % (author, title)) return coverlink else: logger.debug("getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink: logger.debug("getBookCover: Caching google cover for %s %s" % (author, title)) return coverlink else: logger.debug("getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in google page for %s" % bookID) else: logger.debug("getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. LibraryThing whatwork 3. Goodreads search if book was imported from goodreads 4. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') coverfile = os.path.join(cachedir, bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'images/cache/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 work = getBookWork(bookID, "Cover") if work: try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) # not found in librarything work page, try to get a cover from goodreads or google instead myDB = database.DBConnection() item = myDB.match( 'select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID) if item: title = safe_unicode(item['BookName']).encode( lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode( lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split('="')[1].split( '"')[0] except IndexError: img = None if img and img.startswith( 'http' ) and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( "getBookCover: Caching goodreads cover for %s %s" % (author, title)) return coverlink else: logger.debug( "getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( "getBookCover: Caching google cover for %s %s" % (author, title)) return coverlink else: logger.debug( "getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in google page for %s" % bookID) else: logger.debug( "getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID,)) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID,)) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug("getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug("getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug("getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug("getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing whatwork 4. Goodreads search if book was imported from goodreads 5. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'cache/book/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() item = myDB.match('select BookFile from books where bookID=?', (bookID, )) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): logger.debug(u"getBookCover: Copying book cover to %s" % coverfile) shutil.copyfile(coverimg, coverfile) coverlink = 'cache/book/' + bookID + '.jpg' return coverlink # no cover.jpg, try to get a cover from goodreads cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID, )) safeparams = '' if item: title = safe_unicode(item['BookName']) title = title.encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split( 'src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split( '"')[0] except IndexError: img = None if img and img.startswith( 'http' ) and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink, success = cache_img("book", bookID, img) if success: logger.debug( "getBookCover: Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink else: logger.debug( "getBookCover: Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug( "getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # nothing from goodreads, see if librarything workpage has a cover work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( 'getBookCover: Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( 'getBookCover: Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) if safeparams: # if all else fails, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( "getBookCover: Caching google cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink else: logger.debug( "getBookCover: Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug( "getBookCover: No image found in google page for %s" % bookID) else: logger.debug( "getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def getBookWork(bookID=None): if not bookID: logger.error("getBookWork - No bookID") return None myDB = database.DBConnection() item = myDB.action( 'select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID).fetchone() if item: cacheLocation = "WorkCache" # does the workpage need to expire? # expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached response for %s" % workfile) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = formatter.safe_unicode( item['BookName']).encode('utf-8') author = formatter.safe_unicode( item['AuthorName']).encode('utf-8') safeparams = urllib.quote_plus("%s %s" % (author, title)) URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams time_now = int(time.time()) if time_now <= lazylibrarian.LAST_LIBRARYTHING: # called within the last second? time.sleep( 1) # sleep 1 second to respect librarything api terms lazylibrarian.LAST_LIBRARYTHING = time_now result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] time_now = int(time.time()) if time_now <= lazylibrarian.LAST_LIBRARYTHING: # called within the last second? time.sleep( 1 ) # sleep 1 second to respect librarything api terms lazylibrarian.LAST_LIBRARYTHING = time_now result, success = fetchURL(workpage) except: try: errmsg = result.split('<error>')[1].split( '</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug( u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except: logger.debug( u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching response for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (workpage, result)) return None else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookWork(bookID=None, reason=None): """ return the contents of the LibraryThing workpage for the given bookid preferably from the cache. If not already cached cache the results Return None if no workpage available """ if not bookID: logger.error("getBookWork - No bookID") return None if not reason: reason = "" myDB = database.DBConnection() item = myDB.match( 'select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID) if item: cacheLocation = "WorkCache" # does the workpage need to expire? # expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" % (bookID, reason)) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = safe_unicode(item['BookName']).encode( lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode( lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s %s" % (author, title)) URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams librarything_wait() result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug( u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except Exception: logger.debug( u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching response for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (workpage, result)) return None else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: # need to specify authors.AuthorName here as function is called during dbupgrade v15 to v16 # while books.authorname column is still present cmd = 'select BookName,authors.AuthorName,BookISBN from books,authors where bookID="%s"' % bookID cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" else: cmd = 'select SeriesName from series where SeriesID="%s"' % seriesID cacheLocation = "SeriesCache" item = myDB.match(cmd) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG[ 'CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it os.remove(workfile) #os.remove(workfile) # ignore cache for testing if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug( u"getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug( u"getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug( u"getBookWork: Returning Cached seriespage for %s" % item['seriesName']) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if bookID: title = safe_unicode(item['BookName']).encode( lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode( lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (urllib.quote_plus(author), urllib.quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']).encode( lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % urllib.quote_plus( seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] except Exception: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[ 'BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split( '</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split( '</error>')[0] except Exception: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug( "getBookWork: Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "getBookWork: Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug(u"getBookWork: Caching workpage for %s" % workfile) else: logger.debug( u"getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug( u"getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug( u"getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None