def isbn_from_words(words): """Use Google to get an ISBN from words from title and author's name.""" baseurl = "http://www.google.com/search?q=ISBN+" if not PY2: search_url = baseurl + quote(words.replace(' ', '+')) else: search_url = baseurl + words.replace(' ', '+') headers = { 'User-Agent': 'w3m/0.5.3', 'Content-Type': 'text/plain; charset="UTF-8"', 'Content-Transfer-Encoding': 'Quoted-Printable', } content, success = fetchURL(search_url, headers=headers) # noinspection Annotator RE_ISBN13 = re.compile(r'97[89]{1}(?:-?\d){10,16}|97[89]{1}[- 0-9]{10,16}') RE_ISBN10 = re.compile(r'ISBN\x20(?=.{13}$)\d{1,5}([- ])\d{1,7}' r'\1\d{1,6}\1(\d|X)$|[- 0-9X]{10,16}') # take the first answer that's a plain isbn, no spaces, dashes etc. res = RE_ISBN13.findall(content) for item in res: if len(item) == 13: return item res = RE_ISBN10.findall(content) for item in res: if len(item) == 10: return item logger.debug('No ISBN found for %s' % words) return None
def LISTOPIA(host=None, feednr=None, priority=0): """ Goodreads Listopia query function, return all the results in a list """ results = [] maxpage = priority if not str(host)[:4] == "http": host = 'http://' + host page = 0 next_page = True while next_page: URL = host if page: URL = "%s?page=%i" % (host, page) result, success = fetchURL(URL) next_page = False if not success: logger.error('Error fetching data from %s: %s' % (URL, result)) elif result: logger.debug('Parsing results from %s' % URL) data = result.split('<td valign="top" class="number">') for entry in data[1:]: try: # index = entry.split('<')[0] title = entry.split('<a title="')[1].split('"')[0] book_id = entry.split('data-resource-id="')[1].split( '"')[0] author_name = entry.split('<a class="authorName"')[ 1].split('"name">')[1].split('<')[0] results.append({ 'rss_prov': host.split('/list/show/')[1], 'rss_feed': feednr, 'rss_title': title, 'rss_author': author_name, 'rss_bookid': book_id, 'rss_isbn': '', 'priority': priority }) next_page = True except IndexError: pass else: logger.debug('No data returned from %s' % URL) page += 1 if maxpage: if page >= maxpage: logger.warn( 'Maximum results page reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s" % (len(results), plural(len(results)), host)) return results
def build_bookstrap_themes(): themelist = [] if not os.path.isdir(os.path.join(PROG_DIR, 'data', 'interfaces', 'bookstrap')): return themelist # return empty if bookstrap interface not installed if not internet(): logger.warn('Build Bookstrap Themes: No internet connection') return themelist URL = 'http://bootswatch.com/api/3.json' result, success = fetchURL(URL, None, False) # use default headers, no retry if not success: logger.debug("Error getting bookstrap themes : %s" % result) return themelist try: results = json.loads(result) for theme in results['themes']: themelist.append(theme['name'].lower()) except Exception as e: # error reading results logger.debug('JSON Error reading bookstrap themes, %s' % str(e)) logger.debug("Bookstrap found %i themes" % len(themelist)) return themelist
def isbn_from_words(words): """Use Google to get an ISBN from words from title and author's name.""" baseurl = "http://www.google.com/search?q=ISBN+" if not PY2: search_url = baseurl + quote(words.replace(' ', '+')) else: search_url = baseurl + words.replace(' ', '+') headers = {'User-Agent': 'w3m/0.5.3', 'Content-Type': 'text/plain; charset="UTF-8"', 'Content-Transfer-Encoding': 'Quoted-Printable', } content, success = fetchURL(search_url, headers=headers) # noinspection Annotator RE_ISBN13 = re.compile(r'97[89]{1}(?:-?\d){10,16}|97[89]{1}[- 0-9]{10,16}') RE_ISBN10 = re.compile(r'ISBN\x20(?=.{13}$)\d{1,5}([- ])\d{1,7}' r'\1\d{1,6}\1(\d|X)$|[- 0-9X]{10,16}') # take the first answer that's a plain isbn, no spaces, dashes etc. res = RE_ISBN13.findall(content) for item in res: if len(item) == 13: return item res = RE_ISBN10.findall(content) for item in res: if len(item) == 10: return item logger.debug('No ISBN found for %s' % words) return None
def GOODREADS(host=None, feednr=None, priority=0, dispname=None, test=False): """ Goodreads RSS query function, return all the results in a list, can handle multiple wishlists but expects goodreads format (looks for goodreads category names) """ results = [] basehost = host if not str(host)[:4] == "http": host = 'http://' + host URL = host result, success = fetchURL(URL) if test: return success if success: data = feedparser.parse(result) else: logger.error('Error fetching data from %s: %s' % (host, result)) BlockProvider(basehost, result) return [] if data: logger.debug('Parsing results from %s' % URL) provider = data['feed']['link'] if not dispname: dispname = provider logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries)))) for post in data.entries: title = '' book_id = '' author_name = '' isbn = '' if 'title' in post: title = post.title if 'book_id' in post: book_id = post.book_id if 'author_name' in post: author_name = post.author_name if 'isbn' in post: isbn = post.isbn if title and author_name: results.append({ 'rss_prov': provider, 'rss_feed': feednr, 'rss_title': title, 'rss_author': author_name, 'rss_bookid': book_id, 'rss_isbn': isbn, 'priority': priority, 'dispname': dispname }) else: logger.debug('No data returned from %s' % host) return results
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid, )) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img( "author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') coverfile = os.path.join(cachedir, authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'images/cache/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() authors = myDB.select( 'select AuthorName from authors where AuthorID = "%s"' % authorid) if authors: authorname = safe_unicode(authors[0][0]).encode( lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg&as_q=" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(authorid, img) if coverlink is not None: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, result)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def _getJSON(URL, params): # Get JSON response from URL # Return json,True or error_msg,False URL += "/?%s" % urlencode(params) result, success = fetchURL(URL, retry=False) if success: try: result_json = json.loads(result) return result_json, True except (ValueError, AttributeError): return "Could not convert response to json", False return "getJSON returned %s" % result, False
def isbn_from_words(words): """ Use Google to get an ISBN for a book from words in title and authors name. Store the results in the database """ myDB = database.DBConnection() res = myDB.match("SELECT ISBN from isbn WHERE Words=?", (words, )) if res: logger.debug('Found cached ISBN for %s' % words) return res['ISBN'] baseurl = "http://www.google.com/search?q=ISBN+" if not PY2: search_url = baseurl + quote(words.replace(' ', '+')) else: search_url = baseurl + words.replace(' ', '+') headers = { 'User-Agent': 'w3m/0.5.3', 'Content-Type': 'text/plain; charset="UTF-8"', 'Content-Transfer-Encoding': 'Quoted-Printable', } content, success = fetchURL(search_url, headers=headers) # noinspection Annotator RE_ISBN13 = re.compile(r'97[89]{1}(?:-?\d){10,16}|97[89]{1}[- 0-9]{10,16}') RE_ISBN10 = re.compile( r'ISBN\x20(?=.{13}$)\d{1,5}([- ])\d{1,7}\1\d{1,6}\1(\d|X)$|[- 0-9X]{10,16}' ) # take the first valid looking answer res = RE_ISBN13.findall(content) logger.debug('Found %s ISBN13 for %s' % (len(res), words)) for item in res: if len(item) > 13: item = item.replace('-', '').replace(' ', '') if len(item) == 13: myDB.action("INSERT into isbn (Words, ISBN) VALUES (?, ?)", (words, item)) return item res = RE_ISBN10.findall(content) logger.debug('Found %s ISBN10 for %s' % (len(res), words)) for item in res: if len(item) > 10: item = item.replace('-', '').replace(' ', '') if len(item) == 10: myDB.action("INSERT into isbn (Words, ISBN) VALUES (?, ?)", (words, item)) return item logger.debug('No valid ISBN found for %s' % words) return None
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "author", authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'cache/author/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid,)) if author: authorname = safe_unicode(author['AuthorName']) if PY2: authorname = authorname.encode(lazylibrarian.SYS_ENCODING) safeparams = quote_plus("author %s" % authorname) URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author' result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success, was_in_cache = cache_img("author", authorid, img) if success: if was_in_cache: logger.debug("Returning cached google image for %s" % authorname) else: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def isbn_from_words(words): """ Use Google to get an ISBN for a book from words in title and authors name. Store the results in the database """ myDB = database.DBConnection() res = myDB.match("SELECT ISBN from isbn WHERE Words=?", (words,)) if res: logger.debug('Found cached ISBN for %s' % words) return res['ISBN'] baseurl = "http://www.google.com/search?q=ISBN+" if not PY2: search_url = baseurl + quote(words.replace(' ', '+')) else: search_url = baseurl + words.replace(' ', '+') headers = {'User-Agent': 'w3m/0.5.3', 'Content-Type': 'text/plain; charset="UTF-8"', 'Content-Transfer-Encoding': 'Quoted-Printable', } content, success = fetchURL(search_url, headers=headers) # noinspection Annotator RE_ISBN13 = re.compile(r'97[89]{1}(?:-?\d){10,16}|97[89]{1}[- 0-9]{10,16}') RE_ISBN10 = re.compile(r'ISBN\x20(?=.{13}$)\d{1,5}([- ])\d{1,7}\1\d{1,6}\1(\d|X)$|[- 0-9X]{10,16}') # take the first valid looking answer res = RE_ISBN13.findall(content) logger.debug('Found %s ISBN13 for %s' % (len(res), words)) for item in res: if len(item) > 13: item = item.replace('-', '').replace(' ', '') if len(item) == 13: myDB.action("INSERT into isbn (Words, ISBN) VALUES (?, ?)", (words, item)) return item res = RE_ISBN10.findall(content) logger.debug('Found %s ISBN10 for %s' % (len(res), words)) for item in res: if len(item) > 10: item = item.replace('-', '').replace(' ', '') if len(item) == 10: myDB.action("INSERT into isbn (Words, ISBN) VALUES (?, ?)", (words, item)) return item logger.debug('No valid ISBN found for %s' % words) return None
def getAuthorImage(authorid=None): # tbm=isch search images # tbs=ift:jpg jpeg file type if not authorid: logger.error("getAuthorImage: No authorid") return None cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') coverfile = os.path.join(cachedir, authorid + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getAuthorImage: Returning Cached response for %s" % coverfile) coverlink = 'images/cache/' + authorid + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() authors = myDB.select('select AuthorName from authors where AuthorID = "%s"' % authorid) if authors: authorname = authors[0][0] safeparams = urllib.quote_plus("%s" % authorname) URL="https://www.google.com/search?tbm=isch&tbs=ift:jpg&as_q=" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(authorid, img) if coverlink is not None: logger.debug("Cached google image for %s" % authorname) return coverlink else: logger.debug("Error getting google image %s, [%s]" % (img, result)) else: logger.debug("No image found in google page for %s" % authorname) else: logger.debug("Error getting google page for %s, [%s]" % (safeparams, result)) else: logger.debug("No author found for %s" % authorid) return None
def LIME(book=None): provider = "Limetorrent" host = lazylibrarian.LIME_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError) as e: seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def KAT(book=None): provider = "KAT" host = lazylibrarian.KAT_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/usearch/" + book['searchterm']) params = { "category": "books", "field": "seeders", "sorder": "desc" } searchURL = providerurl + "/?%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[1] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c0 = [] c1 = [] c3 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 3: c0.append(row.findAll('td')[0]) c1.append(row.findAll('td')[1]) c3.append(row.findAll('td')[3]) for col0, col1, col3 in zip(c0, c1, c3): try: title = unaccented(str(col0).split('cellMainLink">')[1].split('<')[0]) # kat can return magnet or torrent or both. If both, prefer magnet... try: url = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0] except IndexError: url = 'http' + str(col0).split('href="http')[1].split('.torrent?')[0] + '.torrent' try: size = str(col1.text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col3.text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def ZOO(book=None): provider = "zooqle" host = lazylibrarian.ZOO_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/search?q=" + book['searchterm']) params = { "category": "books", "fmt": "rss" } searchURL = providerurl + "&%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None if link: url = link if magnet: # if both, prefer magnet over torrent url = magnet if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = {"type": "search", "cid": "2", "search": book['searchterm']} searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < int(seeders): # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl + link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def RSS(host=None, feednr=None, priority=0, test=False): """ Generic RSS query function, just return all the results from the RSS feed in a list """ results = [] URL = host if not str(URL)[:4] == "http": URL = 'http://' + URL result, success = fetchURL(URL) if test: return success if success: data = feedparser.parse(result) else: logger.error('Error fetching data from %s: %s' % (host, result)) BlockProvider(host, result) data = None if data: # to debug because of api logger.debug('Parsing results from %s' % URL) provider = data['feed']['link'] logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries)))) for post in data.entries: title = None magnet = None size = None torrent = None nzb = None url = None tortype = 'torrent' if 'title' in post: title = post.title if 'links' in post: for f in post.links: if 'x-bittorrent' in f['type']: size = f['length'] torrent = f['href'] break if 'x-nzb' in f['type']: size = f['length'] nzb = f['href'] break if 'torrent_magneturi' in post: magnet = post.torrent_magneturi if torrent: url = torrent tortype = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet tortype = 'magnet' if nzb: # prefer nzb over torrent/magnet url = nzb tortype = 'nzb' if not url: if 'link' in post: url = post.link tor_date = 'Fri, 01 Jan 1970 00:00:00 +0100' if 'newznab_attr' in post: if post.newznab_attr['name'] == 'usenetdate': tor_date = post.newznab_attr['value'] if not size: size = 1000 if title and url: results.append({ 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_date': tor_date, 'tor_feed': feednr, 'tor_type': tortype, 'priority': priority }) else: logger.debug('No data returned from %s' % host) return results
def NZBDownloadMethod(bookid=None, nzbtitle=None, nzburl=None, library='eBook'): myDB = database.DBConnection() Source = '' downloadID = '' if lazylibrarian.CONFIG['NZB_DOWNLOADER_SABNZBD'] and lazylibrarian.CONFIG[ 'SAB_HOST']: Source = "SABNZBD" downloadID = sabnzbd.SABnzbd(nzbtitle, nzburl, False) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_NZBGET'] and lazylibrarian.CONFIG[ 'NZBGET_HOST']: Source = "NZBGET" data, success = fetchURL(nzburl) if not success: logger.debug('Failed to read nzb data for nzbget: %s' % data) downloadID = '' else: nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl downloadID = nzbget.sendNZB(nzb) if lazylibrarian.CONFIG['NZB_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: Source = "SYNOLOGY_NZB" downloadID = synology.addTorrent(nzburl) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" nzbfile, success = fetchURL(nzburl) if not success: logger.warn('Error fetching nzb from url [%s]: %s' % (nzburl, nzbfile)) nzbfile = '' if nzbfile: nzbname = str(nzbtitle) + '.nzb' nzbpath = os.path.join(lazylibrarian.CONFIG['NZB_BLACKHOLEDIR'], nzbname) try: with open(nzbpath, 'wb') as f: if isinstance(nzbfile, unicode): nzbfile = nzbfile.encode('iso-8859-1') f.write(nzbfile) logger.debug('NZB file saved to: ' + nzbpath) setperm(nzbpath) downloadID = nzbname except Exception as e: logger.error('%s not writable, NZB not saved. %s: %s' % (nzbpath, type(e).__name__, str(e))) downloadID = '' if not Source: logger.warn('No NZB download method is enabled, check config.') return False if downloadID: logger.debug('Nzbfile has been downloaded from ' + str(nzburl)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid, )) elif library == 'AudioBook': myDB.action( 'UPDATE books SET audiostatus = "Snatched" WHERE BookID=?', (bookid, )) myDB.action( 'UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, nzburl)) return True else: logger.error('Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, Source)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (nzburl, )) return False
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') mult = 1 try: if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[2].text) except ValueError: seeders = 0 if minimumseeders < int(seeders): # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getBookWork(bookID=None, reason=None): """ return the contents of the LibraryThing workpage for the given bookid preferably from the cache. If not already cached cache the results Return None if no workpage available """ if not bookID: logger.error("getBookWork - No bookID") return None if not reason: reason = "" myDB = database.DBConnection() item = myDB.match( 'select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID) if item: cacheLocation = "WorkCache" # does the workpage need to expire? # expireafter = lazylibrarian.CACHE_AGE cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" % (bookID, reason)) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = safe_unicode(item['BookName']).encode( lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode( lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s %s" % (author, title)) URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams librarything_wait() result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug( u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except Exception: logger.debug( u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching response for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (workpage, result)) return None else: logger.debug( u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. LibraryThing whatwork 3. Goodreads search if book was imported from goodreads 4. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = os.path.join(str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') coverfile = os.path.join(cachedir, bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'images/cache/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 work = getBookWork(bookID, "Cover") if work: try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) # not found in librarything work page, try to get a cover from goodreads or google instead myDB = database.DBConnection() item = myDB.match( 'select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID) if item: title = safe_unicode(item['BookName']).encode( lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode( lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split('="')[1].split( '"')[0] except IndexError: img = None if img and img.startswith( 'http' ) and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( "getBookCover: Caching goodreads cover for %s %s" % (author, title)) return coverlink else: logger.debug( "getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink is not None: logger.debug( "getBookCover: Caching google cover for %s %s" % (author, title)) return coverlink else: logger.debug( "getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug( "getBookCover: No image found in google page for %s" % bookID) else: logger.debug( "getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def RSS(host=None, feednr=None): """ Generic RSS query function, just return all the results from all the RSS feeds in a list """ results = [] if not str(host)[:4] == "http": host = 'http://' + host URL = host result, success = fetchURL(URL) if success: data = feedparser.parse(result) else: logger.error('Error fetching data from %s: %s' % (host, result)) data = None if data: # to debug because of api logger.debug(u'Parsing results from %s' % (URL)) provider = data['feed']['link'] logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries)))) for post in data.entries: title = None magnet = None size = None torrent = None nzb = None url = None tortype = 'torrent' if 'title' in post: title = post.title if 'links' in post: for f in post.links: if 'x-bittorrent' in f['type']: size = f['length'] torrent = f['href'] break if 'x-nzb' in f['type']: size = f['length'] nzb = f['href'] break if 'torrent_magneturi' in post: magnet = post.torrent_magneturi if torrent: url = torrent tortype = 'torrent' if magnet: # prefer magnet over torrent url = magnet tortype = 'magnet' if nzb: # prefer nzb over torrent/magnet url = nzb tortype = 'nzb' if not url: if 'link' in post: url = post.link tor_date = 'Fri, 01 Jan 1970 00:00:00 +0100' if 'newznab_attr' in post: if post.newznab_attr['name'] == 'usenetdate': tor_date = post.newznab_attr['value'] if not size: size = 1000 if title and url: results.append({ 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_date': tor_date, 'tor_feed': feednr, 'tor_type': tortype }) else: logger.debug('No data returned from %s' % host) return results
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[ -1] # un-named table, last one in page if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: # skip table headers rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') if data: link = data.get('href') extn = td[4].text.split('(')[0].strip() size = td[4].text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text link = '' newsoup = BeautifulSoup(str(td[2]), 'html5lib') for res in newsoup.find_all('a'): output = res.get('href') if 'md5' in output: link = output break except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) size = size_in_bytes(size) if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if link.startswith('http'): url = redirect_url(host, link) else: if "/index.php?" in link: link = 'md5' + link.split('md5')[1] if "/ads.php?" in link: url = url_fix(host + "/" + link) else: url = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(url) if not success: logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) logger.debug(url) url = None else: url = None try: new_soup = BeautifulSoup( bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error( '%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if not prov: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table', rules='rows')[-1] # the last table with rules=rows if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: # skip table headers rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: # Foreign fiction try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') if data: link = data.get('href') extn = td[4].text.split('(')[0].strip() size = td[4].text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug('Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: # Non-fiction try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text link = '' newsoup = BeautifulSoup(str(td[2]), 'html5lib') for res in newsoup.find_all('a'): output = res.get('href') if 'md5' in output: link = output break except IndexError as e: logger.debug('Error parsing libgen search.php results; %s' % str(e)) size = size_in_bytes(size) if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if link.startswith('http'): url = redirect_url(host, link) else: if "/index.php?" in link: link = 'md5' + link.split('md5')[1] if "/ads.php?" in link: url = url_fix(host + "/" + link) else: url = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(url) if not success: logger.debug('Error fetching link data from %s: %s' % (provider, bookresult)) logger.debug(url) url = None else: url = None try: new_soup = BeautifulSoup(bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith('http') and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split('/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split('/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NZBDownloadMethod(bookid=None, nzbtitle=None, nzburl=None, library='eBook'): myDB = database.DBConnection() Source = '' downloadID = '' # if library in ['eBook', 'AudioBook']: # nzbtitle = '%s LL.(%s)' % (nzbtitle, bookid) if lazylibrarian.CONFIG['NZB_DOWNLOADER_SABNZBD'] and lazylibrarian.CONFIG['SAB_HOST']: Source = "SABNZBD" downloadID = sabnzbd.SABnzbd(nzbtitle, nzburl, False) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_NZBGET'] and lazylibrarian.CONFIG['NZBGET_HOST']: Source = "NZBGET" data, success = fetchURL(nzburl) if not success: logger.debug('Failed to read nzb data for nzbget: %s' % data) downloadID = '' else: nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl downloadID = nzbget.sendNZB(nzb) if lazylibrarian.CONFIG['NZB_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG['USE_SYNOLOGY'] and \ lazylibrarian.CONFIG['SYNOLOGY_HOST']: Source = "SYNOLOGY_NZB" downloadID = synology.addTorrent(nzburl) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" nzbfile, success = fetchURL(nzburl) if not success: logger.warn('Error fetching nzb from url [%s]: %s' % (nzburl, nzbfile)) nzbfile = '' if nzbfile: nzbname = str(nzbtitle) + '.nzb' nzbpath = os.path.join(lazylibrarian.CONFIG['NZB_BLACKHOLEDIR'], nzbname) try: with open(nzbpath, 'wb') as f: if isinstance(nzbfile, text_type): nzbfile = nzbfile.encode('iso-8859-1') f.write(nzbfile) logger.debug('NZB file saved to: ' + nzbpath) setperm(nzbpath) downloadID = nzbname except Exception as e: logger.error('%s not writable, NZB not saved. %s: %s' % (nzbpath, type(e).__name__, str(e))) downloadID = '' if not Source: logger.warn('No NZB download method is enabled, check config.') return False if downloadID: logger.debug('Nzbfile has been downloaded from ' + str(nzburl)) if library == 'eBook': myDB.action('UPDATE books SET status="Snatched" WHERE BookID=?', (bookid,)) elif library == 'AudioBook': myDB.action('UPDATE books SET audiostatus = "Snatched" WHERE BookID=?', (bookid,)) myDB.action('UPDATE wanted SET status="Snatched", Source=?, DownloadID=? WHERE NZBurl=?', (Source, downloadID, nzburl)) return True else: logger.error('Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, Source)) myDB.action('UPDATE wanted SET status="Failed" WHERE NZBurl=?', (nzburl,)) return False
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing whatwork 4. Goodreads search if book was imported from goodreads 5. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'cache/book/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() item = myDB.match('select BookFile from books where bookID=?', (bookID, )) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): logger.debug(u"getBookCover: Copying book cover to %s" % coverfile) shutil.copyfile(coverimg, coverfile) coverlink = 'cache/book/' + bookID + '.jpg' return coverlink # no cover.jpg, try to get a cover from goodreads cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID, )) safeparams = '' if item: title = safe_unicode(item['BookName']) title = title.encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split( 'src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split( '"')[0] except IndexError: img = None if img and img.startswith( 'http' ) and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink, success = cache_img("book", bookID, img) if success: logger.debug( "getBookCover: Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink else: logger.debug( "getBookCover: Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug( "getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # nothing from goodreads, see if librarything workpage has a cover work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( 'getBookCover: Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug( 'getBookCover: Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug( "getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) if safeparams: # if all else fails, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split( 'src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink, success = cache_img("book", bookID, img) if success: logger.debug( "getBookCover: Caching google cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink else: logger.debug( "getBookCover: Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug( "getBookCover: No image found in google page for %s" % bookID) else: logger.debug( "getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def TDL(book=None): provider = "torrentdownloads" host = lazylibrarian.TDL_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) url = None if success: new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if minimumseeders < int(seeders): if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. Google isbn search (if google has a link to book for sale) 7. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def getBookWork(bookID=None, reason=None): """ return the contents of the LibraryThing workpage for the given bookid preferably from the cache. If not already cached cache the results Return None if no workpage available """ if not bookID: logger.error("getBookWork - No bookID") return None if not reason: reason = "" myDB = database.DBConnection() item = myDB.match('select BookName,AuthorName,BookISBN from books where bookID="%s"' % bookID) if item: cacheLocation = "WorkCache" cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if not os.path.exists(cacheLocation): os.mkdir(cacheLocation) workfile = os.path.join(cacheLocation, bookID + '.html') # does the workpage need to expire? #if os.path.isfile(workfile): # cache_modified_time = os.stat(workfile).st_mtime # time_now = time.time() # expiry = lazylibrarian.CACHE_AGE * 24 * 60 * 60 # expire cache after this many seconds # if cache_modified_time < time_now - expiry: # # Cache entry is too old, delete it # os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookWork: Returning Cached WorkPage for %s %s" % (bookID, reason)) with open(workfile, "r") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 bookisbn = item['BookISBN'] if bookisbn: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + bookisbn else: title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING) safeparams = urllib.quote_plus("%s %s" % (author, title)) URL = 'http://www.librarything.com/api/whatwork.php?title=' + safeparams librarything_wait() result, success = fetchURL(URL) if success: try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug(u"getBookWork: Got librarything error page: [%s] %s" % (errmsg, URL.split('?')[1])) except Exception: logger.debug(u"getBookWork: Unable to find workpage link for %s" % URL.split('?')[1]) return None if success: logger.debug(u"getBookWork: Caching workpage for %s" % workfile) with open(workfile, "w") as cachefile: cachefile.write(result) return result else: logger.debug(u"getBookWork: Unable to cache workpage for %s, got %s" % (workpage, result)) return None else: logger.debug(u"getBookWork: Unable to cache response for %s, got %s" % (URL, result)) return None else: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) return None
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID,)) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID,)) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug("getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split('</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug("getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug("getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug("getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = {"category": "books", "field": "seeders", "sorder": "desc"} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split( '.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[3].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug( '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urllib.urlencode(params) sterm = book['searchterm'] if isinstance(sterm, str) and hasattr(sterm, "decode"): sterm = sterm.decode('utf-8') rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) BlockProvider(host, result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type match = False if (provider['BOOKSEARCH'] and searchType in ["book", "shortbook"]) or \ (provider['AUDIOSEARCH'] and searchType in ["audio", "shortaudio"]): errorlist = [ 'no such function', 'unknown parameter', 'unknown function', 'bad request', 'incorrect parameter', 'does not support' ] for item in errorlist: if item in errormsg.lower(): match = True if match: count = 0 if searchType in ["book", "shortbook"]: msg = 'BOOKSEARCH' elif searchType in ["audio", "shortaudio"]: msg = 'AUDIOSEARCH' else: msg = '' if not msg: logger.error( 'Error trying to disable searchtype [%s] for %s' % (searchType, host)) else: while count < len(lazylibrarian.NEWZNAB_PROV): if lazylibrarian.NEWZNAB_PROV[count][ 'HOST'] == provider['HOST']: if str(provider['MANUAL']) == 'False': logger.error("Disabled %s=%s for %s" % (msg, provider[msg], provider['HOST'])) lazylibrarian.NEWZNAB_PROV[count][ msg] = "" threadname = threading.currentThread( ).name lazylibrarian.config_write() threading.currentThread( ).name = threadname else: logger.error( "Unable to disable %s for %s [MANUAL=%s]" % (msg, provider['HOST'], provider['MANUAL'])) count += 1 if not match: BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType( book, nzb, host, searchMode, provider['DLPRIORITY']) if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join( parts[:5]) # strip the +0200 dt = datetime.datetime.strptime( nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age( '%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.debug( 'Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = {"q": book['searchterm'], "category": "books", "fmt": "rss"} searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL, raw=True) if test: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') except AttributeError: pass if result.startswith('<') and result.endswith('/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success, result if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: try: result = result.decode('utf-8') except UnicodeDecodeError: result = result.decode('latin-1') except AttributeError: pass if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY']) thisnzb['dispname'] = provider['DISPNAME'] if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join(parts[:5]) # strip the +0200 dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.warn('Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def get_capabilities(provider): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ match = False if len(provider['UPDATED']) == 10: # any stored values? match = True if (age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?t=caps&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) if success: data = ElementTree.fromstring(source_xml) else: logger.debug(u"Error getting xml from %s, %s" % (URL, source_xml)) data = '' if len(data): logger.debug(u"Parsing xml for capabilities of %s" % URL) # # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want # # # set some defaults # provider['GENERALSEARCH'] = 'search' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' # search = data.find('searching/search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'books': bookcat = cat.attrib['id'] # keep main bookcat for later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'book' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower(): provider['BOOKCAT'] = "%s,%s" % (provider['BOOKCAT'], subcat.attrib['id']) if 'magazines' in subcat.attrib['name'].lower() or 'mags' in subcat.attrib['name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % (provider['MAGCAT'], subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib['id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT'])) provider['UPDATED'] = today() lazylibrarian.config_write() else: logger.warn(u"Unable to get capabilities for %s: No data returned" % URL) return provider
def NZBDownloadMethod(bookid=None, nzbtitle=None, nzburl=None): myDB = database.DBConnection() Source = '' downloadID = '' if lazylibrarian.CONFIG['NZB_DOWNLOADER_SABNZBD'] and lazylibrarian.CONFIG[ 'SAB_HOST']: Source = "SABNZBD" downloadID = sabnzbd.SABnzbd(nzbtitle, nzburl, False) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_NZBGET'] and lazylibrarian.CONFIG[ 'NZBGET_HOST']: Source = "NZBGET" # headers = {'User-Agent': USER_AGENT} # data = request.request_content(url=nzburl, headers=headers) data, success = fetchURL(nzburl) if not success: logger.debug('Failed to read nzb data for nzbget: %s' % data) downloadID = '' else: nzb = classes.NZBDataSearchResult() nzb.extraInfo.append(data) nzb.name = nzbtitle nzb.url = nzburl downloadID = nzbget.sendNZB(nzb) if lazylibrarian.CONFIG['NZB_DOWNLOADER_SYNOLOGY'] and lazylibrarian.CONFIG[ 'USE_SYNOLOGY'] and lazylibrarian.CONFIG['SYNOLOGY_HOST']: Source = "SYNOLOGY_NZB" downloadID = synology.addTorrent(nzburl) # returns nzb_ids or False if lazylibrarian.CONFIG['NZB_DOWNLOADER_BLACKHOLE']: Source = "BLACKHOLE" nzbfile, success = fetchURL(nzburl) if not success: logger.warn('Error fetching nzb from url [%s]: %s' % (nzburl, nzbfile)) nzbfile = '' if nzbfile: nzbname = str(nzbtitle) + '.nzb' nzbpath = os.path.join(lazylibrarian.CONFIG['NZB_BLACKHOLEDIR'], nzbname) try: with open(nzbpath, 'w') as f: f.write(nzbfile) logger.debug('NZB file saved to: ' + nzbpath) setperm(nzbpath) downloadID = nzbname except Exception as e: logger.error('%s not writable, NZB not saved. Error: %s' % (nzbpath, str(e))) downloadID = '' if not Source: logger.warn('No NZB download method is enabled, check config.') return False if downloadID: logger.debug('Nzbfile has been downloaded from ' + str(nzburl)) myDB.action('UPDATE books SET status = "Snatched" WHERE BookID="%s"' % bookid) myDB.action( 'UPDATE wanted SET status = "Snatched", Source = "%s", DownloadID = "%s" WHERE NZBurl="%s"' % (Source, downloadID, nzburl)) return True else: logger.error(u'Failed to download nzb @ <a href="%s">%s</a>' % (nzburl, Source)) myDB.action('UPDATE wanted SET status = "Failed" WHERE NZBurl="%s"' % nzburl) return False
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug( '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urllib.urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if test: if result.startswith('<') and result.endswith( '/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType( book, nzb, host, searchMode, provider['DLPRIORITY']) if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join( parts[:5]) # strip the +0200 dt = datetime.datetime.strptime( nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age( '%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.debug( 'Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def getBookWork(bookID=None, reason=None, seriesID=None): """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given preferably from the cache. If not already cached cache the results Return None if no workpage/seriespage available """ global ALLOW_NEW, LAST_NEW if not bookID and not seriesID: logger.error("getBookWork - No bookID or seriesID") return None if not reason: reason = "" myDB = database.DBConnection() if bookID: cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' cacheLocation = "WorkCache" item = myDB.match(cmd, (bookID, )) else: cmd = 'select SeriesName from series where SeriesID=?' cacheLocation = "SeriesCache" item = myDB.match(cmd, (seriesID, )) if item: cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation) if bookID: workfile = os.path.join(cacheLocation, str(bookID) + '.html') else: workfile = os.path.join(cacheLocation, str(seriesID) + '.html') # does the workpage need to expire? For now only expire if it was an error page # (small file) or a series page as librarything might get better info over time, more series members etc if os.path.isfile(workfile): if seriesID or os.path.getsize(workfile) < 500: cache_modified_time = os.stat(workfile).st_mtime time_now = time.time() expiry = lazylibrarian.CONFIG[ 'CACHE_AGE'] * 24 * 60 * 60 # expire cache after this many seconds if cache_modified_time < time_now - expiry: # Cache entry is too old, delete it if ALLOW_NEW: os.remove(workfile) if os.path.isfile(workfile): # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 if bookID: if reason: logger.debug( "getBookWork: Returning Cached entry for %s %s" % (bookID, reason)) else: logger.debug( "getBookWork: Returning Cached workpage for %s" % bookID) else: logger.debug( "getBookWork: Returning Cached seriespage for %s" % item['seriesName']) if PY2: with open(workfile, "r") as cachefile: source = cachefile.read() else: # noinspection PyArgumentList with open(workfile, "r", errors="backslashreplace") as cachefile: source = cachefile.read() return source else: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 if not ALLOW_NEW: # don't nag. Show message no more than every 12 hrs timenow = int(time.time()) if check_int(LAST_NEW, 0) + 43200 < timenow: logger.warn("New WhatWork is disabled") LAST_NEW = timenow return None if bookID: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \ (quote_plus(author), quote_plus(title)) else: seriesname = safe_unicode(item['seriesName']) if PY2: seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING) URL = 'http://www.librarything.com/series/%s' % quote_plus( seriesname) librarything_wait() result, success = fetchURL(URL) if bookID and success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split('</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # if no workpage link, try isbn instead if item['BookISBN']: URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[ 'BookISBN'] librarything_wait() result, success = fetchURL(URL) if success: # noinspection PyBroadException try: workpage = result.split('<link>')[1].split( '</link>')[0] librarything_wait() result, success = fetchURL(workpage) except Exception: # no workpage link found by isbn try: errmsg = result.split('<error>')[1].split( '</error>')[0] except IndexError: errmsg = "Unknown Error" # still cache if whatwork returned a result without a link, so we don't keep retrying logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN'])) success = True else: # still cache if whatwork returned a result without a link, so we don't keep retrying msg = "Librarything: [" + errmsg + "] for " logger.debug(msg + item['AuthorName'] + ' ' + item['BookName']) success = True if success: with open(workfile, "w") as cachefile: cachefile.write(result) if bookID: logger.debug("getBookWork: Caching workpage for %s" % workfile) else: logger.debug( "getBookWork: Caching series page for %s" % workfile) # return None if we got an error page back if '</request><error>' in result: return None return result else: if bookID: logger.debug( "getBookWork: Unable to cache workpage, got %s" % result) else: logger.debug( "getBookWork: Unable to cache series page, got %s" % result) return None else: if bookID: logger.debug('Get Book Work - Invalid bookID [%s]' % bookID) else: logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID) return None
def EXTRA(book=None): provider = "Extratorrent" host = lazylibrarian.EXTRA_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/rss") params = { "type": "search", "s_cat": "2", "search": book['searchterm'] } searchURL = providerurl + "/?%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug('Error fetching data from %s: %s' % (provider, data)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug( '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s' % (host, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) if rootxml is not None: # to debug because of api logger.debug(u'Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error(u"%s - %s" % (host, errormsg)) if provider[ 'BOOKSEARCH'] and searchType == "book": # maybe the host doesn't support it errorlist = [ 'no such function', 'unknown parameter', 'unknown function', 'incorrect parameter' ] match = False for item in errorlist: if item in errormsg.lower(): match = True if match: count = 0 while count < len(lazylibrarian.NEWZNAB_PROV): if lazylibrarian.NEWZNAB_PROV[count][ 'HOST'] == provider['HOST']: if str(provider['MANUAL']) == 'False': logger.error( "Disabled booksearch=%s for %s" % (provider['BOOKSEARCH'], provider['HOST'])) lazylibrarian.NEWZNAB_PROV[count][ 'BOOKSEARCH'] = "" lazylibrarian.config_write() else: logger.error( "Unable to disable booksearch for %s [MANUAL=%s]" % (provider['HOST'], provider['MANUAL'])) count += 1 else: resultxml = rootxml.getiterator('item') nzbcount = 0 for nzb in resultxml: try: nzbcount += 1 results.append( ReturnResultsFieldsBySearchType( book, nzb, host, searchMode)) except IndexError: logger.debug('No results from %s for %s' % (host, book['searchterm'])) logger.debug(u'Found %s nzb at %s for: %s' % (nzbcount, host, book['searchterm'])) else: logger.debug('No data returned from %s for %s' % (host, book['searchterm'])) return results
def TPB(book=None): provider = "TPB" host = lazylibrarian.TPB_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/s/?q=" + book['searchterm']) params = { "category": "601", "page": "0", "orderby": "99" } searchURL = providerurl + "&%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[0] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 2: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) for col1, col2 in zip(c1, c2): try: title = unaccented(str(col1).split('title=')[1].split('>')[1].split('<')[0]) magnet = str(col1).split('href="')[1].split('"')[0] size = unaccented(col1.text.split(', Size ')[1].split('iB')[0]) mult = 1 try: if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col2.text) except ValueError: seeders = 0 if magnet and minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: if minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def GEN(book=None): provider = "libgen" host = lazylibrarian.CONFIG['GEN_HOST'] if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix( host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" + book['searchterm']) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] c7 = [] c8 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 8: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) c7.append(row.findAll('td')[7]) c8.append(row.findAll('td')[8]) for col1, col2, col7, col8 in zip(c1, c2, c7, c8): try: author = unaccented(col1.text) title = unaccented( str(col2).split('>')[2].split('<')[0].strip()) link = str(col2).split('href="')[1].split('?')[1].split('"')[0] size = unaccented(col7.text).upper() extn = col8.text try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn bookURL = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(bookURL) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(bookURL) logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) bookresult = False if bookresult: url = None new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('/get.php'): url = output break if url: url = url_fix(host + url) results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct' }) logger.debug('Found %s, Size %s' % (title, size)) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def GEN(book=None): provider = "libgen" host = lazylibrarian.GEN_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix(host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" + book['searchterm']) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] c7 = [] c8 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 8: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) c7.append(row.findAll('td')[7]) c8.append(row.findAll('td')[8]) for col1, col2, col7, col8 in zip(c1, c2, c7, c8): try: author = unaccented(col1.text) title = unaccented(str(col2).split('>')[2].split('<')[0].strip()) link = str(col2).split('href="')[1].split('?')[1].split('"')[0] size = unaccented(col7.text).upper() extn = col8.text try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError) as e: size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn bookURL = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(bookURL) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(bookURL) logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) bookresult = False if bookresult: url = None new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('/get.php'): url = output break if url: url = url_fix(host + url) results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s, Size %s' % (title, size)) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "search": book['searchterm'], "page": page, "cat": cat } searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '503' in result: logger.warn("Cloudflare bot detection? %s: %s" % (provider, result)) logger.warn("Try unblocking %s from a browser" % providerurl) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') rows = [] try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getBookCover(bookID=None, src=None): """ Return link to a local file containing a book cover image for a bookid, and which source used. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing cover image (if you have a dev key) 4. LibraryThing whatwork (if available) 5. Goodreads search (if book was imported from goodreads) 6. OpenLibrary image 7. Google isbn search (if google has a link to book for sale) 8. Google images search (if lazylibrarian config allows) src = cache, cover, goodreads, librarything, whatwork, googleisbn, openlibrary, googleimage Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None, src if not src: src = '' logger.debug("Getting %s cover for %s" % (src, bookID)) # noinspection PyBroadException try: cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, "book", bookID + '.jpg') if not src or src == 'cache' or src == 'current': if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 coverlink = 'cache/book/' + bookID + '.jpg' return coverlink, 'cache' elif src: lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 return None, src myDB = database.DBConnection() if not src or src == 'cover': item = myDB.match('select BookFile from books where bookID=?', (bookID,)) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): if src: coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg') coverlink = 'cache/book/' + bookID + '_cover.jpg' logger.debug("Caching cover.jpg for %s" % bookID) else: coverlink = 'cache/book/' + bookID + '.jpg' logger.debug("Caching cover.jpg for %s" % coverfile) _ = safe_copy(coverimg, coverfile) return coverlink, src if src: logger.debug('No cover.jpg found for %s' % bookID) return None, src # see if librarything has a cover if not src or src == 'librarything': if lazylibrarian.CONFIG['LT_DEVKEY']: cmd = 'select BookISBN from books where bookID=?' item = myDB.match(cmd, (bookID,)) if item and item['BookISBN']: img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % ( lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN']) if src: coverlink, success, _ = cache_img("book", bookID + '_lt', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching librarything cover for %s" % bookID) return coverlink, 'librarything' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No isbn for %s" % bookID) if src: return None, src # see if librarything workpage has a cover if not src or src == 'whatwork': work = getBookWork(bookID, "Cover") if work: try: img = work.split('workCoverImage')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('workCoverImage not found in work page for %s' % bookID) try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_ww', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) # if librarything has no image they return a 1x1 gif data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink)) if success: logger.debug("Caching whatwork cover for %s" % bookID) return coverlink, 'whatwork' else: logger.debug('Failed to cache image for %s [%s]' % (img, coverlink)) else: logger.debug("No image found in work page for %s" % bookID) except IndexError: logger.debug('og:image not found in work page for %s' % bookID) else: logger.debug('No work page for %s' % bookID) if src: return None, src cmd = 'select BookName,AuthorName,BookLink,BookISBN from books,authors where bookID=?' cmd += ' and books.AuthorID = authors.AuthorID' item = myDB.match(cmd, (bookID,)) safeparams = '' booklink = '' if item: title = safe_unicode(item['BookName']) author = safe_unicode(item['AuthorName']) if PY2: title = title.encode(lazylibrarian.SYS_ENCODING) author = author.encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = quote_plus("%s %s" % (author, title)) # try to get a cover from goodreads if not src or src == 'goodreads': if booklink and 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/ # 1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover result, success = fetchURL(booklink) if success: try: img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0] except IndexError: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'goodreads': coverlink, success, _ = cache_img("book", bookID + '_gr', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'goodreads' else: logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in goodreads page for %s" % bookID) else: logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result)) if src: return None, src # try to get a cover from openlibrary if not src or src == 'openlibrary': if item['BookISBN']: baseurl = 'https://openlibrary.org/api/books?format=json&jscmd=data&bibkeys=ISBN:' result, success = fetchURL(baseurl + item['BookISBN']) if success: try: source = json.loads(result) # type: dict except Exception as e: logger.debug("OpenLibrary json error: %s" % e) source = [] img = '' if source: # noinspection PyUnresolvedReferences k = source.keys()[0] try: img = source[k]['cover']['medium'] except KeyError: try: img = source[k]['cover']['large'] except KeyError: logger.debug("No openlibrary image for %s" % item['BookISBN']) if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: if src == 'openlibrary': coverlink, success, _ = cache_img("book", bookID + '_ol', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty openlibrary image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching openlibrary cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'openlibrary' else: logger.debug("OpenLibrary error: %s" % result) if src: return None, src if not src or src == 'googleisbn': # try a google isbn page search... # there is no image returned if google doesn't have a link for buying the book if safeparams: URL = "http://www.google.com/search?q=ISBN+" + safeparams result, success = fetchURL(URL) if success: try: img = result.split('imgurl=')[1].split('&imgrefurl')[0] except IndexError: try: img = result.split('img src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gi', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google isbn cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google isbn' else: logger.debug("Error caching google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google isbn page for %s" % bookID) else: logger.debug("Failed to fetch url from google") else: logger.debug("No parameters for google isbn search for %s" % bookID) if src: return None, src if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']: # try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type if safeparams: URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" img = None result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): if src: coverlink, success, _ = cache_img("book", bookID + '_gb', img) else: coverlink, success, _ = cache_img("book", bookID, img, refresh=True) data = '' coverfile = os.path.join(lazylibrarian.DATADIR, coverlink) if os.path.isfile(coverfile): with open(coverfile, 'rb') as f: data = f.read() if len(data) < 50: logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink)) elif success: logger.debug("Caching google search cover for %s %s" % (item['AuthorName'], item['BookName'])) return coverlink, 'google image' else: logger.debug("Error getting google image %s, [%s]" % (img, coverlink)) else: logger.debug("No image found in google page for %s" % bookID) else: logger.debug("No parameters for google image search for %s" % bookID) if src: return None, src logger.debug("No image found from any configured source") return None, src except Exception: logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc()) return None, src
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') size = size_in_bytes(size) try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getBookCover(bookID=None): """ Return link to a local file containing a book cover image for a bookid. Try 1. Local file cached from goodreads/googlebooks when book was imported 2. cover.jpg if we have the book 3. LibraryThing whatwork 4. Goodreads search if book was imported from goodreads 5. Google images search Return None if no cover available. """ if not bookID: logger.error("getBookCover- No bookID") return None cachedir = lazylibrarian.CACHEDIR coverfile = os.path.join(cachedir, bookID + '.jpg') if os.path.isfile(coverfile): # use cached image if there is one lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1 logger.debug(u"getBookCover: Returning Cached response for %s" % coverfile) coverlink = 'cache/' + bookID + '.jpg' return coverlink lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1 myDB = database.DBConnection() item = myDB.match('select BookFile from books where bookID="%s"' % bookID) if item: bookfile = item['BookFile'] if bookfile: # we may have a cover.jpg in the same folder bookdir = os.path.dirname(bookfile) coverimg = os.path.join(bookdir, "cover.jpg") if os.path.isfile(coverimg): logger.debug(u"getBookCover: Copying book cover to %s" % coverfile) shutil.copyfile(coverimg, coverfile) coverlink = 'cache/' + bookID + '.jpg' return coverlink # if no cover.jpg, see if librarything workpage has a cover work = getBookWork(bookID, "Cover") if work: try: img = work.split('og:image')[1].split('="')[1].split('"')[0] if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink: logger.debug(u"getBookCover: Caching librarything cover for %s" % bookID) return coverlink else: logger.debug("getBookCover: No image found in work page for %s" % bookID) except IndexError: logger.debug('getBookCover: Image not found in work page for %s' % bookID) # not found in librarything work page, try to get a cover from goodreads or google instead item = myDB.match('select BookName,AuthorName,BookLink from books where bookID="%s"' % bookID) if item: title = safe_unicode(item['BookName']).encode(lazylibrarian.SYS_ENCODING) author = safe_unicode(item['AuthorName']).encode(lazylibrarian.SYS_ENCODING) booklink = item['BookLink'] safeparams = urllib.quote_plus("%s %s" % (author, title)) if 'goodreads' in booklink: # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID} # and scrape the page for og:image # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/1388267702i/16304._UY475_SS475_.jpg"/> # to get the cover time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now result, success = fetchURL(booklink) if success: try: img = result.split('og:image')[1].split('="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now coverlink = cache_cover(bookID, img) if coverlink: logger.debug("getBookCover: Caching goodreads cover for %s %s" % (author, title)) return coverlink else: logger.debug("getBookCover: Error getting goodreads image for %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in goodreads page for %s" % bookID) else: logger.debug("getBookCover: Error getting page %s, [%s]" % (booklink, result)) # if this failed, try a google image search... # tbm=isch search images # tbs=isz:l large images # ift:jpg jpeg file type URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook" result, success = fetchURL(URL) if success: try: img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0] except IndexError: img = None if img and img.startswith('http'): coverlink = cache_cover(bookID, img) if coverlink: logger.debug("getBookCover: Caching google cover for %s %s" % (author, title)) return coverlink else: logger.debug("getBookCover: Error getting google image %s, [%s]" % (img, result)) else: logger.debug("getBookCover: No image found in google page for %s" % bookID) else: logger.debug("getBookCover: Error getting google page for %s, [%s]" % (safeparams, result)) return None
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = { "type": "search", "s_cat": "2", "search": book['searchterm'] } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders'].replace(',', '')) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def GEN(book=None, prov=None): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urllib.urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] # un-named table if table: rows = table.findAll('tr') except IndexError: # no results table in result page rows = [] if 'search.php' in search and len(rows) > 1: rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.findAll('td') if 'index.php' in search and len(td) > 3: try: res = str( BeautifulStoneSoup( td[0].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) author = formatAuthorName(res) title = str( BeautifulStoneSoup( td[2].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) temp = str(td[4]) temp = temp.split('onmouseout')[1] extn = temp.split('">')[1].split('(')[0] size = temp.split('">')[1].split('(')[1].split( ')')[0] size = size.upper() link = temp.split('href=')[1].split('"')[1] except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: res = str( BeautifulStoneSoup( td[1].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) author = formatAuthorName(res) title = str( td[2]).split('>')[2].split('<')[0].strip() title = str( BeautifulStoneSoup( title, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) link = str(td[2]).split('href="')[1].split( '?')[1].split('"')[0] size = unaccented(td[7].text).upper() extn = td[8].text except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) if not size: size = 0 else: try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if not link.startswith('http'): if "/ads.php?" in link: url = url_fix(host + link) else: url = url_fix(host + "/ads.php?" + link) else: url = redirect_url(host, link) bookresult, success = fetchURL(url) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug( u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(url) logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) errmsg = bookresult bookresult = False if bookresult: url = None try: new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.debug( 'Error parsing bookresult for %s: %s' % (link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results, errmsg
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = { "q": book['searchterm'], "category": "books", "fmt": "rss" } searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds'].replace(',', '')) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = {"search": book['searchterm'], "page": page, "cat": cat} searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[2].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = { "q": book['searchterm'] } providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 try: pubdate = item['published'] except KeyError: pubdate = None url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate results.append(res) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders'].replace(',', '')) link = item['link'] size = int(item['size']) url = None try: pubdate = item['published'] except KeyError: pubdate = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate logger.debug('Found %s. Size: %s' % (title, size)) results.append(res) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = {"q": book['searchterm']} providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def get_capabilities(provider, force=False): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ if not force and len(provider['UPDATED']) == 10: # any stored values? match = True if (age(provider['UPDATED']) > lazylibrarian.CONFIG['CACHE_AGE']) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False else: match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?t=caps' # most providers will give you caps without an api key logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) # If it failed, retry with api key if not success: if provider['API']: URL = URL + '&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) if success: try: data = ElementTree.fromstring(source_xml) except ElementTree.ParseError: data = '' logger.debug("Error parsing xml from %s, %s" % (URL, source_xml)) else: logger.debug("Error getting xml from %s, %s" % (URL, source_xml)) data = '' if len(data): logger.debug("Parsing xml for capabilities of %s" % URL) # # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want # # # set some defaults # provider['GENERALSEARCH'] = 'search' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['AUDIOCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' provider['AUDIOSEARCH'] = '' # search = data.find('searching/search') if search is not None: # noinspection PyUnresolvedReferences if 'available' in search.attrib: # noinspection PyUnresolvedReferences if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'audio': provider['AUDIOCAT'] = cat.attrib['id'] subcats = cat.getiterator('subcat') for subcat in subcats: if 'audiobook' in subcat.attrib['name'].lower(): provider['AUDIOCAT'] = "%s,%s" % ( provider['AUDIOCAT'], subcat.attrib['id']) elif cat.attrib['name'].lower() == 'books': bookcat = cat.attrib[ 'id'] # keep main bookcat for starting magazines later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' # set default booksearch if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'book' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case we got some settings back search = data.find('searching/book-search') if search: # noinspection PyUnresolvedReferences if 'available' in search.attrib: # noinspection PyUnresolvedReferences if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower(): provider['BOOKCAT'] = "%s,%s" % ( provider['BOOKCAT'], subcat.attrib['id']) if 'magazines' in subcat.attrib['name'].lower( ) or 'mags' in subcat.attrib['name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % ( provider['MAGCAT'], subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib['id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s : Audio %s" % (provider['BOOKCAT'], provider['MAGCAT'], provider['AUDIOCAT'])) provider['UPDATED'] = today() threadname = threading.currentThread().name lazylibrarian.config_write() threading.currentThread().name = threadname else: logger.warn("Unable to get capabilities for %s: No data returned" % URL) return provider
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = { "category": "books", "field": "seeders", "sorder": "desc" } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[3].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] data = None params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?' + urllib.urlencode(params) rootxml = None result, success = fetchURL(URL) if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s' % (host, str(e))) rootxml = None else: logger.error('Error reading data from %s: %s' % (host, result)) if rootxml is not None: # to debug because of api logger.debug(u'Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error(u"%s - %s" % (host, errormsg)) if provider['BOOKSEARCH']: # maybe the host doesn't support it errorlist = ['no such function', 'unknown parameter', 'unknown function', 'incorrect parameter'] match = False for item in errorlist: if item in errormsg.lower() and provider['BOOKSEARCH'].lower() in errormsg.lower(): match = True if match: count = 0 while count < len(lazylibrarian.NEWZNAB_PROV): if lazylibrarian.NEWZNAB_PROV[count]['HOST'] == provider['HOST']: if str(provider['MANUAL']) == 'False': logger.error( "Disabled booksearch=%s for %s" % (provider['BOOKSEARCH'], provider['HOST'])) lazylibrarian.NEWZNAB_PROV[count]['BOOKSEARCH'] = "" lazylibrarian.config_write() else: logger.error( "Unable to disable booksearch for %s [MANUAL=%s]" % (provider['HOST'], provider['MANUAL'])) count += 1 else: resultxml = rootxml.getiterator('item') nzbcount = 0 for nzb in resultxml: try: nzbcount = nzbcount + 1 results.append(ReturnResultsFieldsBySearchType(book, nzb, searchType, host, searchMode)) except IndexError: logger.debug('No results from %s for %s' % (host, book['searchterm'])) logger.debug(u'Found %s nzb at %s for: %s' % (nzbcount, host, book['searchterm'])) else: logger.debug('No data returned from %s for %s' % (host, book['searchterm'])) return results