def _Magazine(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'magid' not in kwargs: self.data = self._error_with_message('No Magazine Provided') return links = [] entries = [] title = '' cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues " cmd += "WHERE Title='%s' order by IssueDate DESC" results = myDB.select(cmd % kwargs['magid']) page = results[index:(index + self.PAGE_SIZE)] for issue in page: title = makeUnicode(issue['Title']) entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])), 'id': escape('issue:%s' % issue['IssueID']), 'updated': opdstime(issue['IssueAcquired']), 'content': escape('%s - %s' % (title, issue['IssueDate'])), 'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mimeType(issue['IssueFile'])} if lazylibrarian.CONFIG['OPDS_METAINFO']: fname = os.path.splitext(issue['IssueFile'])[0] res = cache_img('magazine', issue['IssueID'], fname + '.jpg') entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) feed = {} title = '%s (%s)' % (escape(title), len(entries)) feed['title'] = 'LazyLibrarian OPDS - %s' % title feed['id'] = 'magazine:%s' % escape(kwargs['magid']) feed['updated'] = now() links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']), index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']), index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries)))) self.data = feed return
def url_fix(s, charset='utf-8'): if PY2 and isinstance(s, text_type): s = s.encode(charset, 'ignore') elif not PY2 and not isinstance(s, text_type): s = s.decode(charset) scheme, netloc, path, qs, anchor = urlsplit(s) path = quote(path, '/%') qs = quote_plus(qs, ':&=') return urlunsplit((scheme, netloc, path, qs, anchor))
def url_fix(s, charset='utf-8'): if PY2 and isinstance(s, text_type): s = s.encode(charset, 'ignore') elif PY3 and not isinstance(s, text_type): s = s.decode(charset) scheme, netloc, path, qs, anchor = urlsplit(s) path = quote(path, '/%') qs = quote_plus(qs, ':&=') return urlunsplit((scheme, netloc, path, qs, anchor))
def get_book_desc(isbn=None, author=None, title=None): """ GoodReads does not always have a book description in its api results due to restrictive TOS from some of its providers. Try to get missing descriptions from googlebooks Return description, empty string if not found, None if error""" if not author or not title: return '' author = cleanName(author) title = cleanName(title) if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': baseurl = 'https://www.googleapis.com/books/v1/volumes?q=' urls = [ baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title)) ] if isbn: urls.insert(0, baseurl + quote_plus('isbn:' + isbn)) for url in urls: if lazylibrarian.CONFIG['GB_API']: url += '&key=' + lazylibrarian.CONFIG['GB_API'] if lazylibrarian.CONFIG['GB_COUNTRY'] and len( lazylibrarian.CONFIG['GB_COUNTRY'] == 2): url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY'] results, cached = gb_json_request(url) if results is None: # there was an error return None if results and not cached: time.sleep(1) if results and 'items' in results: for item in results['items']: # noinspection PyBroadException try: auth = item['volumeInfo']['authors'][0] book = item['volumeInfo']['title'] desc = item['volumeInfo']['description'] book_fuzz = fuzz.token_set_ratio(book, title) auth_fuzz = fuzz.token_set_ratio(auth, author) if book_fuzz > 98 and auth_fuzz > 80: return desc except Exception: pass return ''
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid, )) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img( "author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def _RecentAudio(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE " if 'query' in kwargs: cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND " cmd += "AudioStatus='Open' order by AudioLibrary DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for book in page: title = makeUnicode(book['BookName']) entry = {'title': escape(title), 'id': escape('audio:%s' % book['BookID']), 'updated': opdstime(book['AudioLibrary']), 'href': '%s?cmd=Serve&audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mimeType("we_send.zip")} if lazylibrarian.CONFIG['OPDS_METAINFO']: author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID']) author = makeUnicode(author['AuthorName']) entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (title, book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (title, book['BookAdded'])) entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s result%s" % (len(entries), plural(len(entries)))) self.data = feed return
def get_book_desc(isbn=None, author=None, title=None): """ GoodReads does not always have a book description in its api results due to restrictive TOS from some of its providers. Try to get missing descriptions from googlebooks Return description, empty string if not found, None if error""" if not author or not title: return '' author = cleanName(author) title = cleanName(title) if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': baseurl = 'https://www.googleapis.com/books/v1/volumes?q=' urls = [baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))] if isbn: urls.insert(0, baseurl + quote_plus('isbn:' + isbn)) for url in urls: if lazylibrarian.CONFIG['GB_API']: url += '&key=' + lazylibrarian.CONFIG['GB_API'] if lazylibrarian.CONFIG['GB_COUNTRY'] and len(lazylibrarian.CONFIG['GB_COUNTRY'] == 2): url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY'] results, cached = gb_json_request(url) if results is None: # there was an error return None if results and not cached: time.sleep(1) if results and 'items' in results: for item in results['items']: # noinspection PyBroadException try: auth = item['volumeInfo']['authors'][0] book = item['volumeInfo']['title'] desc = item['volumeInfo']['description'] book_fuzz = fuzz.token_set_ratio(book, title) auth_fuzz = fuzz.token_set_ratio(auth, author) if book_fuzz > 98 and auth_fuzz > 80: return desc except Exception: pass return ''
def _Magazines(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines')) cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)' cmd += ' as Iss_Cnt from magazines ' if 'query' in kwargs: cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' " cmd += 'order by magazines.title' results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for mag in page: if mag['Iss_Cnt'] > 0: title = makeUnicode(mag['Title']) entry = { 'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])), 'id': escape('magazine:%s' % title), 'updated': opdstime(mag['LastAcquired']), 'content': escape('%s' % title), 'href': '%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(title)), 'kind': 'navigation', 'rel': 'subsection', } # disabled cover image as it stops navigation? # if lazylibrarian.CONFIG['OPDS_METAINFO']: # res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True) # entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries)))) self.data = feed return
def _RecentMags(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines')) cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues " cmd += "where IssueFile != '' " if 'query' in kwargs: cmd += "AND Title LIKE '%" + kwargs['query'] + "%' " cmd += "order by IssueAcquired DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for mag in page: title = makeUnicode(mag['Title']) entry = {'title': escape('%s' % mag['IssueDate']), 'id': escape('issue:%s' % mag['IssueID']), 'updated': opdstime(mag['IssueAcquired']), 'content': escape('%s - %s' % (title, mag['IssueDate'])), 'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])), 'kind': 'acquisition', 'rel': 'file', 'author': escape(title), 'type': mimeType(mag['IssueFile'])} if lazylibrarian.CONFIG['OPDS_METAINFO']: fname = os.path.splitext(mag['IssueFile'])[0] res = cache_img('magazine', mag['IssueID'], fname + '.jpg') entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries)))) self.data = feed return
def _Magazines(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines')) cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)' cmd += ' as Iss_Cnt from magazines ' if 'query' in kwargs: cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' " cmd += 'order by magazines.title' results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for mag in page: if mag['Iss_Cnt'] > 0: title = makeUnicode(mag['Title']) entry = { 'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])), 'id': escape('magazine:%s' % title), 'updated': opdstime(mag['LastAcquired']), 'content': escape('%s' % title), 'href': '%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(title)), 'kind': 'navigation', 'rel': 'subsection', } if lazylibrarian.CONFIG['OPDS_METAINFO']: res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True) entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries)))) self.data = feed return
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid,)) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img("author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def multiLink(self, bookfile, bookid): types = [] multi = '' basename, extn = os.path.splitext(bookfile) for item in getList(lazylibrarian.CONFIG['EBOOK_TYPE']): target = basename + '.' + item if os.path.isfile(target): types.append(item) if len(types) > 1: for fmt in types: multi += '<link href="' multi += '%s?cmd=Serve&bookid=%s&fmt=%s' % ( self.opdsroot, quote_plus(bookid), fmt) multi += '" rel="http://opds-spec.org/acquisition" type="' + mimeType( '.' + fmt) + '"/>' return multi
def _Author(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'authorid' not in kwargs: self.data = self._error_with_message('No Author Provided') return links = [] entries = [] links.append( getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'], )) author = makeUnicode(author['AuthorName']) cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE " if 'query' in kwargs: cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND " cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC" results = myDB.select(cmd, (kwargs['authorid'], )) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None rel = 'file' if book['BookFile']: mime_type = self.multiLink(book['BookFile'], book['BookID']) if mime_type: rel = 'multi' else: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: entry = { 'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookAdded']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, book['BookID']), 'kind': 'acquisition', 'rel': rel, 'type': mime_type } if lazylibrarian.CONFIG['OPDS_METAINFO']: entry['image'] = self.searchroot + '/' + book['BookImg'] entry['thumbnail'] = entry['image'] entry['content'] = escape( '%s - %s' % (book['BookName'], book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape( '%s (%s)' % (book['BookName'], book['BookAdded'])) entries.append(entry) feed = {} authorname = '%s (%s)' % (escape(author), len(entries)) feed['title'] = 'LazyLibrarian OPDS - %s' % authorname feed['id'] = 'author:%s' % escape(kwargs['authorid']) feed['updated'] = now() links.append( getLink( href=self.opdsroot, ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append( getLink( href='%s?cmd=Authors' % self.opdsroot, ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink( href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus( kwargs['authorid']), index + self.PAGE_SIZE), ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink( href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus( kwargs['authorid']), index - self.PAGE_SIZE), ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries self.data = feed logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) return
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid, )) seriesname = result['SeriesName'] members, api_hits = getSeriesMembers(seriesid, seriesname) dic = { u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': '' } if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] # pubyear = member[5] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode( lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode( params) rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID, )) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID, )) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG[ 'CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug( "getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug( "getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug( "getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus( seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[ 'BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split( '</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug( "getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug( "getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug( "getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def searchItem(item=None, bookid=None, cat=None): """ Call all active search providers to search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url item = searchterm to use for general search bookid = link to data for book/audio searches cat = category to search [general, book, audio] """ results = [] if not item: return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm if cat in ['book', 'audio']: myDB = database.DBConnection() cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID' cmd += ' and bookID=?' match = myDB.match(cmd, (bookid,)) if match: book['authorName'] = match['authorName'] book['bookName'] = match['bookName'] book['bookSub'] = match['bookSub'] else: logger.debug('Forcing general search') cat = 'general' nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nprov = IterateOverNewzNabSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nprov = IterateOverTorrentSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_DIRECT(): resultlist, nprov = IterateOverDirectSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nprov, dltypes = IterateOverRSSSites() if nprov and dltypes != 'M': results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'dispname' in item: provider = item['dispname'] elif 'nzbprov' in item: provider = item['nzbprov'] elif 'tor_prov' in item: provider = item['tor_prov'] elif 'rss_prov' in item: provider = item['rss_prov'] if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' url = url.encode('utf-8') if mode == 'torznab': # noinspection PyTypeChecker if url.startswith('magnet'): mode = 'magnet' # calculate match percentage - torrents might have words_with_underscore_separator score = fuzz.token_set_ratio(searchterm, title.replace('_', ' ')) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': quote_plus(url), 'mode': mode} searchresults.append(result) logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm)) return searchresults
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. OpenLibrary image 7. Google isbn search (if google has a link to book for sale) 8. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, openlibrary, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src # try to get a cover from openlibrary if not src or src == 'openlibrary': if item['BookISBN']: baseurl = 'https://openlibrary.org/api/books?format=json&jscmd=data&bibkeys=ISBN:' result, success = fetchURL(baseurl + item['BookISBN']) if success: try: source = json.loads(result) # type: dict except Exception as e: logger.debug("OpenLibrary json error: %s" % e) source = [] img = '' if source: # noinspection PyUnresolvedReferences k = source.keys()[0] try: img = source[k]['cover']['medium'] except KeyError: try: img = source[k]['cover']['large'] except KeyError: logger.debug("No openlibrary image for %s" % item['BookISBN']) if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'openlibrary': coverlink, success, _ = cache_img("book", bookID + '_ol', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty openlibrary image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching openlibrary cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'openlibrary' else: logger.debug("OpenLibrary error: %s" % result) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. Google isbn search (if google has a link to book for sale) 7. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace( '"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode( lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode( lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn( 'Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug( 'Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList( lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug( 'Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio( book['author'], authorname) else: author_fuzz = fuzz.ratio( book['author'], fullterm) if title: book_fuzz = fuzz.token_set_ratio( book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio( book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', (book['author'].replace('"', '""'), )) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug( "Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug( 'The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID,)) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID,)) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug("getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug("getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug("getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug("getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace('"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn('Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug('Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug('Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio(book['author'], authorname) else: author_fuzz = fuzz.ratio(book['author'], fullterm) if title: book_fuzz = fuzz.token_set_ratio(book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio(book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', ( book['author'].replace('"', '""'),)) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug("Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug('The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def _RecentBooks(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = { 'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now() } links = [] entries = [] links.append( getLink( href=self.opdsroot, ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append( getLink( href='%s?cmd=RecentBooks' % self.opdsroot, ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append( getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID " cmd += "from books where Status='Open' " if 'query' in kwargs: cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' " cmd += "order by BookLibrary DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None rel = 'file' if book['BookFile']: mime_type = self.multiLink(book['BookFile'], book['BookID']) if mime_type: rel = 'multi' else: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: title = makeUnicode(book['BookName']) entry = { 'title': escape(title), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookLibrary']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])), 'kind': 'acquisition', 'rel': rel, 'type': mime_type } if lazylibrarian.CONFIG['OPDS_METAINFO']: author = myDB.match( "SELECT AuthorName from authors WHERE AuthorID=?", (book['AuthorID'], )) author = makeUnicode(author['AuthorName']) entry['image'] = self.searchroot + '/' + book['BookImg'] entry['thumbnail'] = entry['image'] entry['content'] = escape('%s - %s' % (title, book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (title, book['BookAdded'])) entries.append(entry) """ <link type="application/epub+zip" rel="http://opds-spec.org/acquisition" title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/> <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition" title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/> """ if len(results) > (index + self.PAGE_SIZE): links.append( getLink( href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink( href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype= 'application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) self.data = feed return
def searchItem(item=None, bookid=None, cat=None): """ Call all active search providers to search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url item = searchterm to use for general search bookid = link to data for book/audio searches cat = category to search [general, book, audio] """ results = [] if not item: return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm if cat in ['book', 'audio']: myDB = database.DBConnection() cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID' cmd += ' and bookID=?' match = myDB.match(cmd, (bookid,)) if match: book['authorName'] = match['authorName'] book['bookName'] = match['bookName'] book['bookSub'] = match['bookSub'] else: logger.debug('Forcing general search') cat = 'general' nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nprov = IterateOverNewzNabSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nprov = IterateOverTorrentSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_DIRECT(): resultlist, nprov = IterateOverDirectSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nprov = IterateOverRSSSites() if nprov: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage - torrents might have words_with_underscore_separator score = fuzz.token_set_ratio(searchterm, title.replace('_', ' ')) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': quote_plus(url), 'mode': mode} searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm)) return searchresults
def _Author(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'authorid' not in kwargs: self.data = self._error_with_message('No Author Provided') return links = [] entries = [] links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'],)) author = makeUnicode(author['AuthorName']) cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE " if 'query' in kwargs: cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND " cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC" results = myDB.select(cmd, (kwargs['authorid'],)) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None if book['BookFile']: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: entry = {'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookAdded']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, book['BookID']), 'kind': 'acquisition', 'rel': 'file', 'type': mime_type} if lazylibrarian.CONFIG['OPDS_METAINFO']: entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (book['BookName'], book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (book['BookName'], book['BookAdded'])) entries.append(entry) feed = {} authorname = '%s (%s)' % (escape(author), len(entries)) feed['title'] = 'LazyLibrarian OPDS - %s' % authorname feed['id'] = 'author:%s' % escape(kwargs['authorid']) feed['updated'] = now() links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Authors' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['authorid']), index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['authorid']), index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries self.data = feed logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) return
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,)) seriesname = result['SeriesName'] members = getSeriesMembers(seriesid) dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''} if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) rootxml, in_cache = gr_xml_request(set_url) if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio(booktitle, bookname) if book_fuzz >= 98: try: author = item.find('./best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find('./best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug("Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def _RecentBooks(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentBooks' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID " cmd += "from books where Status='Open' " if 'query' in kwargs: cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' " cmd += "order by BookLibrary DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None if book['BookFile']: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: title = makeUnicode(book['BookName']) entry = {'title': escape(title), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookLibrary']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mime_type} if lazylibrarian.CONFIG['OPDS_METAINFO']: author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID']) author = makeUnicode(author['AuthorName']) entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (title, book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (title, book['BookAdded'])) entries.append(entry) """ <link type="application/epub+zip" rel="http://opds-spec.org/acquisition" title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/> <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition" title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/> """ if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) self.data = feed return