def setAllBookSeries(): """ Try to set series details for all books """ myDB = database.DBConnection() books = myDB.select('select BookID,WorkID,BookName from books where Manual is not "1"') counter = 0 if books: logger.info('Checking series for %s book%s' % (len(books), plural(len(books)))) for book in books: if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': workid = book['WorkID'] if not workid: logger.debug("No workid for book %s: %s" % (book['BookID'], book['BookName'])) else: workid = book['BookID'] if not workid: logger.debug("No bookid for book: %s" % book['BookName']) if workid: serieslist = getWorkSeries(workid) if serieslist: counter += 1 setSeries(serieslist, book['BookID']) deleteEmptySeries() msg = 'Updated %s book%s' % (counter, plural(counter)) logger.info('Series check complete: ' + msg) return msg
def setWorkPages(): """ Set the workpage link for any books that don't already have one """ myDB = database.DBConnection() cmd = 'select BookID,AuthorName,BookName from books,authors where length(WorkPage) < 4' cmd += ' and books.AuthorID = authors.AuthorID' books = myDB.select(cmd) if books: logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books)))) counter = 0 for book in books: bookid = book['BookID'] worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) counter += 1 else: logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName'])) msg = 'Updated %s page%s' % (counter, plural(counter)) logger.debug("setWorkPages complete: " + msg) else: msg = 'No missing WorkPages' logger.debug(msg) return msg
def showJobs(): result = [] result.append("Cache %i hit%s, %i miss" % ( int(lazylibrarian.CACHE_HIT), plural(int(lazylibrarian.CACHE_HIT)), int(lazylibrarian.CACHE_MISS))) myDB = database.DBConnection() snatched = myDB.match("SELECT count('Status') as counter from wanted WHERE Status = 'Snatched'") wanted = myDB.match("SELECT count('Status') as counter FROM books WHERE Status = 'Wanted'") result.append("%i item%s marked as Snatched" % (snatched['counter'], plural(snatched['counter']))) result.append("%i item%s marked as Wanted" % (wanted['counter'], plural(wanted['counter']))) for job in lazylibrarian.SCHED.get_jobs(): job = str(job) if "search_magazines" in job: jobname = "Magazine search" elif "checkForUpdates" in job: jobname = "Check LazyLibrarian version" elif "search_tor_book" in job: jobname = "TOR book search" elif "search_nzb_book" in job: jobname = "NZB book search" elif "search_rss_book" in job: jobname = "RSS book search" elif "processDir" in job: jobname = "Process downloads" else: jobname = job.split(' ')[0].split('.')[2] jobinterval = job.split('[')[1].split(']')[0] jobtime = job.split('at: ')[1].split('.')[0] jobtime = next_run(jobtime) jobinfo = "%s: Next run in %s" % (jobname, jobtime) result.append(jobinfo) return result
def getAuthorImages(): """ Try to get an author image for all authors without one""" myDB = database.DBConnection() cmd = 'select AuthorID, AuthorName from authors where (AuthorImg like "%nophoto%" or AuthorImg is null)' cmd += ' and Manual is not "1"' authors = myDB.select(cmd) if authors: logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors)))) counter = 0 for author in authors: authorid = author['AuthorID'] imagelink = getAuthorImage(authorid) newValueDict = {} if not imagelink: logger.debug('No image found for %s' % author['AuthorName']) newValueDict = {"AuthorImg": 'images/nophoto.png'} elif 'nophoto' not in imagelink: logger.debug('Updating %s image to %s' % (author['AuthorName'], imagelink)) newValueDict = {"AuthorImg": imagelink} if newValueDict: counter += 1 controlValueDict = {"AuthorID": authorid} myDB.upsert("authors", newValueDict, controlValueDict) msg = 'Updated %s image%s' % (counter, plural(counter)) logger.info('Author Image check complete: ' + msg) else: msg = 'No missing author images' logger.debug(msg) return msg
def export_CSV(search_dir=None, status="Wanted"): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ if not search_dir or os.path.isdir(search_dir) is False: logger.warn("Please check Alternate Directory setting") return False csvFile = os.path.join(search_dir, "%s - %s.csv" % (status, now().replace(':', '-'))) myDB = database.DBConnection() find_status = myDB.select('SELECT * FROM books WHERE Status = "%s"' % status) if not find_status: logger.warn(u"No books marked as %s" % status) else: count = 0 with open(csvFile, 'wb') as csvfile: csvwrite = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names (Author, Title, ISBN10) csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug(u"Exported CSV for book %s" % resulted['BookName']) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) count = count + 1 logger.info(u"CSV exported %s book%s to %s" % (count, plural(count), csvFile))
def export_CSV(search_dir=None, status="Wanted", library='eBook'): """ Write a csv file to the search_dir containing all books marked as "Wanted" """ # noinspection PyBroadException try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg elif not os.access(search_dir, os.W_OK | os.X_OK): msg = "Alternate Directory [%s] not writable" % search_dir logger.warn(msg) return msg csvFile = os.path.join(search_dir, "%s %s - %s.csv" % (status, library, now().replace(':', '-'))) myDB = database.DBConnection() cmd = 'SELECT BookID,AuthorName,BookName,BookIsbn,books.AuthorID FROM books,authors ' if library == 'eBook': cmd += 'WHERE books.Status=? and books.AuthorID = authors.AuthorID' else: cmd += 'WHERE AudioStatus=? and books.AuthorID = authors.AuthorID' find_status = myDB.select(cmd, (status,)) if not find_status: msg = "No %s marked as %s" % (library, status) logger.warn(msg) else: count = 0 if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) # write headers, change AuthorName BookName BookIsbn to match import csv names csvwrite.writerow(['BookID', 'Author', 'Title', 'ISBN', 'AuthorID']) for resulted in find_status: logger.debug("Exported CSV for %s %s" % (library, resulted['BookName'])) row = ([resulted['BookID'], resulted['AuthorName'], resulted['BookName'], resulted['BookIsbn'], resulted['AuthorID']]) if PY2: csvwrite.writerow([("%s" % s).encode(lazylibrarian.SYS_ENCODING) for s in row]) else: csvwrite.writerow([("%s" % s) for s in row]) count += 1 msg = "CSV exported %s %s%s to %s" % (count, library, plural(count), csvFile) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in exportCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def getAuthorImages(): """ Try to get an author image for all authors without one""" myDB = database.DBConnection() authors = myDB.select('select AuthorID from authors where AuthorImg like "%nophoto%"') if authors: logger.info('Checking images for %s author%s' % (len(authors), plural(len(authors)))) counter = 0 for author in authors: authorid = author['AuthorID'] imagelink = getAuthorImage(authorid) if imagelink and not "nophoto" in imagelink: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": imagelink} myDB.upsert("authors", newValueDict, controlValueDict) counter += 1 logger.info('Author Image check completed, updated %s image%s' % (counter, plural(counter))) else: logger.debug('No missing images')
def GOODREADS(host=None, feednr=None, priority=0, dispname=None, test=False): """ Goodreads RSS query function, return all the results in a list, can handle multiple wishlists but expects goodreads format (looks for goodreads category names) """ results = [] basehost = host if not str(host)[:4] == "http": host = 'http://' + host URL = host result, success = fetchURL(URL) if test: return success if success: data = feedparser.parse(result) else: logger.error('Error fetching data from %s: %s' % (host, result)) BlockProvider(basehost, result) return [] if data: logger.debug('Parsing results from %s' % URL) provider = data['feed']['link'] if not dispname: dispname = provider logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries)))) for post in data.entries: title = '' book_id = '' author_name = '' isbn = '' if 'title' in post: title = post.title if 'book_id' in post: book_id = post.book_id if 'author_name' in post: author_name = post.author_name if 'isbn' in post: isbn = post.isbn if title and author_name: results.append({ 'rss_prov': provider, 'rss_feed': feednr, 'rss_title': title, 'rss_author': author_name, 'rss_bookid': book_id, 'rss_isbn': isbn, 'priority': priority, 'dispname': dispname }) else: logger.debug('No data returned from %s' % host) return results
def _Magazine(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'magid' not in kwargs: self.data = self._error_with_message('No Magazine Provided') return links = [] entries = [] title = '' cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues " cmd += "WHERE Title='%s' order by IssueDate DESC" results = myDB.select(cmd % kwargs['magid']) page = results[index:(index + self.PAGE_SIZE)] for issue in page: title = makeUnicode(issue['Title']) entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])), 'id': escape('issue:%s' % issue['IssueID']), 'updated': opdstime(issue['IssueAcquired']), 'content': escape('%s - %s' % (title, issue['IssueDate'])), 'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mimeType(issue['IssueFile'])} if lazylibrarian.CONFIG['OPDS_METAINFO']: fname = os.path.splitext(issue['IssueFile'])[0] res = cache_img('magazine', issue['IssueID'], fname + '.jpg') entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) feed = {} title = '%s (%s)' % (escape(title), len(entries)) feed['title'] = 'LazyLibrarian OPDS - %s' % title feed['id'] = 'magazine:%s' % escape(kwargs['magid']) feed['updated'] = now() links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']), index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']), index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries)))) self.data = feed return
def getBookCovers(): """ Try to get a cover image for all books """ myDB = database.DBConnection() books = myDB.select('select BookID,BookImg from books where BookImg like "%nocover%"') if books: logger.info('Checking covers for %s book%s' % (len(books), plural(len(books)))) counter = 0 for book in books: bookid = book['BookID'] coverlink = getBookCover(bookid) if coverlink and "nocover" not in coverlink: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": coverlink} myDB.upsert("books", newValueDict, controlValueDict) counter += 1 logger.info('Cover check complete, updated %s cover%s' % (counter, plural(counter))) else: logger.debug('No missing book covers')
def _RecentAudio(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE " if 'query' in kwargs: cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND " cmd += "AudioStatus='Open' order by AudioLibrary DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for book in page: title = makeUnicode(book['BookName']) entry = {'title': escape(title), 'id': escape('audio:%s' % book['BookID']), 'updated': opdstime(book['AudioLibrary']), 'href': '%s?cmd=Serve&audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mimeType("we_send.zip")} if lazylibrarian.CONFIG['OPDS_METAINFO']: author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID']) author = makeUnicode(author['AuthorName']) entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (title, book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (title, book['BookAdded'])) entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s result%s" % (len(entries), plural(len(entries)))) self.data = feed return
def _Authors(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Authors', 'id': 'Authors', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Authors' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchauthors.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Authors')) cmd = "SELECT AuthorName,AuthorID,HaveBooks,TotalBooks,DateAdded from Authors WHERE " if 'query' in kwargs: cmd += "AuthorName LIKE '%" + kwargs['query'] + "%' AND " cmd += "CAST(HaveBooks AS INTEGER) > 0 order by AuthorName" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for author in page: totalbooks = check_int(author['TotalBooks'], 0) havebooks = check_int(author['HaveBooks'], 0) lastupdated = author['DateAdded'] name = makeUnicode(author['AuthorName']) entry = { 'title': escape('%s (%s/%s)' % (name, havebooks, totalbooks)), 'id': escape('author:%s' % author['AuthorID']), 'updated': opdstime(lastupdated), 'content': escape('%s (%s)' % (name, havebooks)), 'href': '%s?cmd=Author&authorid=%s' % (self.opdsroot, author['AuthorID']), 'author': escape('%s' % name), 'kind': 'navigation', 'rel': 'subsection', } # removed authorimg as it stops navigation ?? # if lazylibrarian.CONFIG['OPDS_METAINFO']: # entry['image'] = self.searchroot + '/' + author['AuthorImg'] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Authors&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Authors&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s author%s" % (len(entries), plural(len(entries)))) self.data = feed return
def _Magazines(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines')) cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)' cmd += ' as Iss_Cnt from magazines ' if 'query' in kwargs: cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' " cmd += 'order by magazines.title' results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for mag in page: if mag['Iss_Cnt'] > 0: title = makeUnicode(mag['Title']) entry = { 'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])), 'id': escape('magazine:%s' % title), 'updated': opdstime(mag['LastAcquired']), 'content': escape('%s' % title), 'href': '%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(title)), 'kind': 'navigation', 'rel': 'subsection', } if lazylibrarian.CONFIG['OPDS_METAINFO']: res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True) entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries)))) self.data = feed return
def _RecentMags(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines')) cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues " cmd += "where IssueFile != '' " if 'query' in kwargs: cmd += "AND Title LIKE '%" + kwargs['query'] + "%' " cmd += "order by IssueAcquired DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for mag in page: title = makeUnicode(mag['Title']) entry = {'title': escape('%s' % mag['IssueDate']), 'id': escape('issue:%s' % mag['IssueID']), 'updated': opdstime(mag['IssueAcquired']), 'content': escape('%s - %s' % (title, mag['IssueDate'])), 'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])), 'kind': 'acquisition', 'rel': 'file', 'author': escape(title), 'type': mimeType(mag['IssueFile'])} if lazylibrarian.CONFIG['OPDS_METAINFO']: fname = os.path.splitext(mag['IssueFile'])[0] res = cache_img('magazine', mag['IssueID'], fname + '.jpg') entry['image'] = self.searchroot + '/' + res[0] entries.append(entry) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries)))) self.data = feed return
def createMagCovers(refresh=False): if not lazylibrarian.CONFIG['IMP_MAGCOVER']: logger.info('Cover creation is disabled in config') return myDB = database.DBConnection() # <> '' ignores empty string or NULL issues = myDB.select("SELECT IssueFile from issues WHERE IssueFile <> ''") if refresh: logger.info("Creating covers for %s issue%s" % (len(issues), plural(len(issues)))) else: logger.info("Checking covers for %s issue%s" % (len(issues), plural(len(issues)))) cnt = 0 for item in issues: try: createMagCover(item['IssueFile'], refresh=refresh) cnt += 1 except Exception as why: logger.warn('Unable to create cover for %s, %s %s' % (item['IssueFile'], type(why).__name__, str(why))) logger.info("Cover creation completed") if refresh: return "Created covers for %s issue%s" % (cnt, plural(cnt)) return "Checked covers for %s issue%s" % (cnt, plural(cnt))
def dump_table(table, savedir=None, status=None): myDB = database.DBConnection() # noinspection PyBroadException try: columns = myDB.select('PRAGMA table_info(%s)' % table) if not columns: # no such table logger.warn("No such table [%s]" % table) return 0 if not os.path.isdir(savedir): savedir = lazylibrarian.DATADIR headers = '' for item in columns: if headers: headers += ',' headers += item[1] if status: cmd = 'SELECT %s from %s WHERE status="%s"' % (headers, table, status) else: cmd = 'SELECT %s from %s' % (headers, table) data = myDB.select(cmd) count = 0 if data is not None: label = table if status: label += '_%s' % status csvFile = os.path.join(savedir, "%s.csv" % label) if PY2: fmode = 'wb' else: fmode = 'w' with open(csvFile, fmode) as csvfile: csvwrite = writer(csvfile, delimiter=',', quotechar='"', quoting=QUOTE_MINIMAL) headers = headers.split(',') csvwrite.writerow(headers) for item in data: if PY2: csvwrite.writerow([makeBytestr(s) if s else '' for s in item]) else: csvwrite.writerow([str(s) if s else '' for s in item]) count += 1 msg = "Exported %s item%s to %s" % (count, plural(count), csvFile) logger.info(msg) return count except Exception: msg = 'Unhandled exception in dump_table: %s' % traceback.format_exc() logger.error(msg) return 0
def sync_to_gr(): msg = '' try: threading.currentThread().name = 'GRSync' if lazylibrarian.CONFIG['GR_WANTED']: to_read_shelf, ll_wanted = grsync('Wanted', lazylibrarian.CONFIG['GR_WANTED']) msg += "%s change%s to %s shelf\n" % (to_read_shelf, plural(to_read_shelf), lazylibrarian.CONFIG['GR_WANTED']) msg += "%s change%s to Wanted from GoodReads\n" % (ll_wanted, plural(ll_wanted)) else: msg += "Sync Wanted books is disabled\n" if lazylibrarian.CONFIG['GR_OWNED']: to_owned_shelf, ll_have = grsync('Open', lazylibrarian.CONFIG['GR_OWNED']) msg += "%s change%s to %s shelf\n" % (to_owned_shelf, plural(to_owned_shelf), lazylibrarian.CONFIG['GR_OWNED']) msg += "%s change%s to Owned from GoodReads\n" % (ll_have, plural(ll_have)) else: msg += "Sync Owned books is disabled\n" logger.info(msg.strip('\n').replace('\n', ', ')) except Exception as e: logger.error("Exception in sync_to_gr: %s %s" % (type(e).__name__, str(e))) finally: threading.currentThread().name = 'WEBSERVER' return msg
def dbUpdate(refresh=False): try: myDB = database.DBConnection() activeauthors = myDB.select('SELECT AuthorName from authors WHERE Status="Active" \ or Status="Loading" order by DateAdded ASC') logger.info('Starting update for %i active author%s' % (len(activeauthors), plural(len(activeauthors)))) for author in activeauthors: authorname = author[0] importer.addAuthorToDB(authorname, refresh=refresh) logger.info('Active author update complete') except Exception as e: logger.error('Unhandled exception in dbUpdate: %s' % traceback.format_exc())
def setWorkPages(): """ Set the workpage link for any books that don't already have one """ myDB = database.DBConnection() books = myDB.select('select BookID,AuthorName,BookName from books where length(WorkPage) < 4') if books: logger.debug('Setting WorkPage for %s book%s' % (len(books), plural(len(books)))) for book in books: bookid = book['BookID'] worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('No WorkPage found for %s: %s' % (book['AuthorName'], book['BookName'])) logger.debug('setWorkPages completed')
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = {"q": book['searchterm']} providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None, test=False): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug('[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % ( searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urllib.urlencode(params) sterm = makeUnicode(book['searchterm']) rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if test: if result.startswith('<') and result.endswith('/>') and "error code" in result: result = result[1:-2] success = False if not success: logger.debug(result) return success if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, result, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type cancelled = cancelSearchType(searchType, errormsg, provider) if not cancelled: # it was some other problem BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType(book, nzb, host, searchMode, provider['DLPRIORITY']) if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join(parts[:5]) # strip the +0200 dt = datetime.datetime.strptime(nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age('%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.debug('Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def _Members(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'seriesid' not in kwargs: self.data = self._error_with_message('No Series Provided') return links = [] entries = [] series = myDB.match("SELECT SeriesName from Series WHERE SeriesID=?", (kwargs['seriesid'],)) cmd = "SELECT BookName,BookDate,BookAdded,BookDesc,BookImg,BookFile,AudioFile,books.BookID,SeriesNum " cmd += "from books,member where (Status='Open' or AudioStatus='Open') and SeriesID=? " cmd += "and books.bookid = member.bookid order by CAST(SeriesNum AS INTEGER)" results = myDB.select(cmd, (kwargs['seriesid'],)) cmd = 'SELECT AuthorName from authors,books WHERE authors.authorid = books.authorid AND ' cmd += 'books.bookid=?' res = myDB.match(cmd, (results[0]['BookID'],)) author = res['AuthorName'] page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None if book['BookFile']: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: if book['SeriesNum']: snum = ' (%s)' % book['SeriesNum'] else: snum = '' entry = {'title': escape('%s%s' % (book['BookName'], snum)), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookAdded']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, book['BookID']), 'kind': 'acquisition', 'rel': 'file', 'author': escape("%s" % author), 'type': mime_type} if lazylibrarian.CONFIG['OPDS_METAINFO']: entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'], book['SeriesNum'], book['BookDesc'])) else: entry['content'] = escape('%s (%s %s) %s' % (book['BookName'], series['SeriesName'], book['SeriesNum'], book['BookAdded'])) entries.append(entry) feed = {} seriesname = '%s (%s) %s' % (escape(series['SeriesName']), len(entries), author) feed['title'] = 'LazyLibrarian OPDS - %s' % seriesname feed['id'] = 'series:%s' % escape(kwargs['seriesid']) feed['updated'] = now() links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Series' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Members&seriesid=%s&index=%s' % (self.opdsroot, kwargs['seriesid'], index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Members&seriesid=%s&index=%s' % (self.opdsroot, kwargs['seriesid'], index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) self.data = feed return
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir( lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural( len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # not a directory, handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug( 'Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: shutil.move( os.path.join( processpath, ourfile), os.path.join( dirname, ourfile)) except Exception as why: logger.debug( "Failed to move file %s to %s, %s" % (ourfile, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.debug(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower( ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace( '/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[ 'IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join( lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode( lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug( "Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['NZBmode']) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = { "Status": "Processed", "NZBDate": now() } # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug( 'Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str( book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int( book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = { "LastAcquired": today(), "IssueStatus": "Open" } else: newValueDict = { "IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = { "Title": book['BookID'], "IssueDate": book['AuxInfo'] } newValueDict = { "IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action( 'UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir( processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless it's not a latin-1 encodable name searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: logger.warn( 'No nzb providers are available. Check config and blocklist' ) if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: logger.warn( 'No direct providers are available. Check config and blocklist' ) if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: logger.warn( 'No torrent providers are available. Check config and blocklist' ) if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: logger.warn( 'No rss providers are available. Check config and blocklist' ) if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) - token_set_ratio takes care of this # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check (nearly) all the words in the mag title are in the nzbtitle - allow some fuzz mag_title_match = fuzz.token_set_ratio( unaccented(bookid), unaccented(nzbtitle_formatted)) if mag_title_match < check_int( lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.debug( u"Magazine token set Match failed: " + str(mag_title_match) + "% for " + nzbtitle_formatted) rejected = True else: logger.debug(u"Magazine matched: " + str(mag_title_match) + "% " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = { "q": book['searchterm'] } providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 try: pubdate = item['published'] except KeyError: pubdate = None url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate results.append(res) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", audiostatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 locked_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL, useCache=not refresh) if not jsonresults: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug('Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(book['isbn']) == 13 and book['isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: controlValueDict = {"isbn": isbnhead} newValueDict = {"lang": booklang} myDB.upsert("languages", newValueDict, controlValueDict) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list logger.debug("%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue ignorable = ['future', 'date', 'isbn'] if lazylibrarian.CONFIG['NO_LANG']: ignorable.append('lang') rejected = None check_status = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) rejected = 'name', 'No bookname' else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() if re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) rejected = 'chars', 'Bad characters in bookname' if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date'])) rejected = 'future', 'Future publication date [%s]' % book['date'] if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug('Rejecting %s, no publication date' % bookname) rejected = 'date', 'No publication date' if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) rejected = 'isbn', 'No ISBN' if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname, authorname)) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 'bookid', 'Got under different bookid %s' % bookid duplicates += 1 cmd = 'SELECT AuthorName,BookName,AudioStatus,books.Status FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 'got', 'Already got this book in database' # Make sure we don't reject books we have got if match['Status'] in ['Open', 'Have'] or match['AudioStatus'] in ['Open', 'Have']: rejected = None if rejected and rejected[0] not in ignorable: removedResults += 1 if check_status or rejected is None or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected[0] in ignorable): # dates, isbn cmd = 'SELECT Status,AudioStatus,BookFile,AudioFile,Manual,BookAdded,BookName ' cmd += 'FROM books WHERE BookID=?' existing = myDB.match(cmd, (bookid,)) if existing: book_status = existing['Status'] audio_status = existing['AudioStatus'] if lazylibrarian.CONFIG['FOUND_STATUS'] == 'Open': if book_status == 'Have' and existing['BookFile']: book_status = 'Open' if audio_status == 'Have' and existing['AudioFile']: audio_status = 'Open' locked = existing['Manual'] added = existing['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status audio_status = audiostatus added = today() locked = False if rejected: reason = rejected[1] if rejected[0] in ignorable: book_status = 'Ignored' audio_status = 'Ignored' book_ignore_count += 1 else: reason = '' if locked: locked_count += 1 else: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added, "WorkID": '', "ScanResult": reason } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 else: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg, BookID from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookid = lastbook['BookID'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookid = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookID": lastbookid, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) resultcount = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Found %s locked book%s" % (locked_count, plural(locked_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s incorrect/incomplete result%s" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = get_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 # skip if no author, no author is no book. try: _ = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = "" except KeyError: bookisbn = "" isbnhead = "" if len(bookisbn) == 10: isbnhead = bookisbn[0:3] elif len(bookisbn) == 13: isbnhead = bookisbn[3:6] try: booklang = item['volumeInfo']['language'] except KeyError: booklang = "Unknown" # do we care about language? if "All" not in valid_langs: if bookisbn != "": # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(bookisbn) == 13 and bookisbn.startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(bookisbn) == 10) or \ (len(bookisbn) == 13 and bookisbn.startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) logger.debug("GB language: " + booklang) if not match: # try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # librarything returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn proxies = proxyList() try: librarything_wait() timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30) r = requests.get(BOOK_URL, timeout=timeout, proxies=proxies) resp = r.text lt_lang_hits += 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if resp != 'invalid' and resp != 'unknown': booklang = resp # found a language code match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) logger.debug("LT language: " + booklang) except Exception as e: booklang = "" logger.error("%s finding language: %s" % (type(e).__name__, str(e))) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list booknamealt = item['volumeInfo']['title'] logger.debug("%s Google thinks [%s], we think [%s]" % (booknamealt, googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: booknamealt = item['volumeInfo']['title'] logger.debug( 'Skipped [%s] with language %s' % (booknamealt, booklang)) ignored += 1 continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = "" try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = "" if not booksub: series = "" seriesNum = "" else: try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" if series.endswith(')'): series = series[:-1] try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" if not seriesNum and '#' in series: words = series.rsplit('#', 1) series = words[0].strip() seriesNum = words[1].strip() if not seriesNum and ' ' in series: words = series.rsplit(' ', 1) # has to be unicode for isnumeric() if (u"%s" % words[1]).isnumeric(): series = words[0] seriesNum = words[1] try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = "" rejected = False check_status = False bookname = item['volumeInfo']['title'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = True else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) bookid = item['id'] # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... existing_book = myDB.match('SELECT Status,Manual,BookAdded FROM books WHERE BookID=?', (bookid,)) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status added = today() locked = False if not rejected and re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if bookdate > today()[:len(bookdate)]: logger.debug('Rejecting %s, future publication date %s' % (bookname, bookdate)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = True duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: if book_status != "Ignored" and not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + bookdate) updated = False if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug('Updated series: %s [%s]' % (bookid, seriesdict)) updated = True # librarything doesn't have series info. Any in the title? elif series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def TDL(book=None): provider = "torrentdownloads" host = lazylibrarian.TDL_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host) params = {"type": "search", "cid": "2", "search": book['searchterm']} searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params) try: request = urllib2.Request(searchURL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) data = urllib2.urlopen(request, timeout=90) except (socket.timeout) as e: logger.debug('Timeout fetching data from %s' % provider) data = False except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: # may return 404 if no results, not really an error if hasattr(e, 'code') and e.code == 404: logger.debug(searchURL) logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logger.debug('Error fetching data from %s: %s' % (provider, errmsg)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders request = urllib2.Request(link) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) conn = urllib2.urlopen(request, timeout=90) result = conn.read() url = None new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if minimumseeders < int(seeders): if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def GEN(book=None): provider = "libgen" host = lazylibrarian.GEN_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix( host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" + book['searchterm']) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] c7 = [] c8 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 8: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) c7.append(row.findAll('td')[7]) c8.append(row.findAll('td')[8]) for col1, col2, col7, col8 in zip(c1, c2, c7, c8): try: author = unaccented(col1.text) title = unaccented( str(col2).split('>')[2].split('<')[0].strip()) link = str(col2).split('href="')[1].split('?')[1].split('"')[0] size = unaccented(col7.text).upper() extn = col8.text try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError) as e: size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn bookURL = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(bookURL) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) bookresult = False else: logger.debug(bookURL) logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) bookresult = False if bookresult: url = None new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('/get.php'): url = output break if url: url = url_fix(host + url) results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s, Size %s' % (title, size)) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def LIME(book=None): provider = "Limetorrent" host = lazylibrarian.LIME_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm']) try: request = urllib2.Request(searchURL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) data = urllib2.urlopen(request, timeout=90) except (socket.timeout) as e: logger.debug('Timeout fetching data from %s' % provider) data = False except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: # may return 404 if no results, not really an error if hasattr(e, 'code') and e.code == 404: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logger.debug('Error fetching data from %s: %s' % (provider, errmsg)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError) as e: seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error( u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def ZOO(book=None): provider = "zooqle" host = lazylibrarian.ZOO_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/search?q=" + book['searchterm']) params = {"category": "books", "fmt": "rss"} searchURL = providerurl + "&%s" % urllib.urlencode(params) try: request = urllib2.Request(searchURL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) data = urllib2.urlopen(request, timeout=90) except (socket.timeout) as e: logger.debug('Timeout fetching data from %s' % provider) data = False except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: # may return 404 if no results, not really an error if hasattr(e, 'code') and e.code == 404: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logger.debug('Error fetching data from %s: %s' % (provider, errmsg)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None if link: url = link if magnet: # if both, prefer magnet over torrent url = magnet if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error( u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def oldKAT(book=None): provider = "KAT" host = lazylibrarian.KAT_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/usearch/" + book['searchterm']) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 params = { "category": "books", "field": "seeders", "sorder": "desc", "rss": "1" } searchURL = providerurl + "/?%s" % urllib.urlencode(params) try: request = urllib2.Request(searchURL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) data = urllib2.urlopen(request, timeout=90) except (socket.timeout) as e: logger.debug('Timeout fetching data from %s' % provider) data = False except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: # seems KAT returns 404 if no results, not really an error if hasattr(e, 'code') and e.code == 404: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logger.debug('Error fetching data from %s: %s' % (provider, errmsg)) data = False results = [] if data: logger.debug(u'Parsing results from <a href="%s">KAT</a>' % searchURL) d = feedparser.parse(data) if len(d.entries): logger.debug( u"Found %i result%s from %s for %s, checking seeders" % (len(d.entries), plural(len( d.entries)), provider, book['searchterm'])) for item in d.entries: try: title = item['title'] seeders = item['torrent_seeds'] url = item['links'][1]['href'] size = int(item['links'][1]['length']) if minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': "KAT", 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug( 'Found %s but %s seeder%s' % (title, int(seeders), plural(int(seeders)))) except Exception as e: logger.error( u"An unknown error occurred in the KAT parser: %s" % str(e)) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def KAT(book=None): provider = "KAT" host = lazylibrarian.KAT_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/usearch/" + book['searchterm']) params = {"category": "books", "field": "seeders", "sorder": "desc"} searchURL = providerurl + "/?%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[1] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c0 = [] c1 = [] c3 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 3: c0.append(row.findAll('td')[0]) c1.append(row.findAll('td')[1]) c3.append(row.findAll('td')[3]) for col0, col1, col3 in zip(c0, c1, c3): try: title = unaccented( str(col0).split('cellMainLink">')[1].split('<')[0]) # kat can return magnet or torrent or both. If both, prefer magnet... try: url = 'magnet' + str(col0).split('href="magnet')[1].split( '"')[0] except IndexError: url = 'http' + str(col0).split('href="http')[1].split( '.torrent?')[0] + '.torrent' try: size = str(col1.text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col3.text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def KAT(book=None): provider = "KAT" host = lazylibrarian.KAT_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/usearch/" + book['searchterm']) params = { "category": "books", "field": "seeders", "sorder": "desc" } searchURL = providerurl + "/?%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[1] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c0 = [] c1 = [] c3 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 3: c0.append(row.findAll('td')[0]) c1.append(row.findAll('td')[1]) c3.append(row.findAll('td')[3]) for col0, col1, col3 in zip(c0, c1, c3): try: title = unaccented(str(col0).split('cellMainLink">')[1].split('<')[0]) # kat can return magnet or torrent or both. If both, prefer magnet... try: url = 'magnet' + str(col0).split('href="magnet')[1].split('"')[0] except IndexError: url = 'http' + str(col0).split('href="http')[1].split('.torrent?')[0] + '.torrent' try: size = str(col1.text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col3.text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def get_shelf_list(self): global consumer, client, token, user_id if not lazylibrarian.CONFIG['GR_API'] or not lazylibrarian.CONFIG['GR_SECRET'] or not \ lazylibrarian.CONFIG['GR_OAUTH_TOKEN'] or not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.warn("Goodreads get shelf error: Please authorise first") return [] else: # # loop over each page of shelves # loop over each shelf # add shelf to list # consumer = oauth.Consumer(key=str(lazylibrarian.CONFIG['GR_API']), secret=str( lazylibrarian.CONFIG['GR_SECRET'])) token = oauth.Token(lazylibrarian.CONFIG['GR_OAUTH_TOKEN'], lazylibrarian.CONFIG['GR_OAUTH_SECRET']) client = oauth.Client(consumer, token) user_id = self.getUserId() current_page = 0 shelves = [] page_shelves = 1 while page_shelves: current_page = current_page + 1 page_shelves = 0 shelf_template = Template( '${base}/shelf/list.xml?user_id=${user_id}&key=${key}&page=${page}' ) body = urllib.urlencode({}) headers = {'Content-Type': 'application/x-www-form-urlencoded'} request_url = shelf_template.substitute( base='https://www.goodreads.com', user_id=user_id, page=current_page, key=lazylibrarian.CONFIG['GR_API']) time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now try: response, content = client.request(request_url, 'GET', body, headers) except Exception as e: logger.error("Exception in client.request: %s %s" % (type(e).__name__, str(e))) return shelves if response['status'] != '200': raise Exception('Failure status: %s for page %s' % (response['status'], current_page)) xmldoc = xml.dom.minidom.parseString(content) shelf_list = xmldoc.getElementsByTagName('shelves')[0] for item in shelf_list.getElementsByTagName('user_shelf'): shelf_name = item.getElementsByTagName( 'name')[0].firstChild.nodeValue shelf_count = item.getElementsByTagName( 'book_count')[0].firstChild.nodeValue shelf_exclusive = item.getElementsByTagName( 'exclusive_flag')[0].firstChild.nodeValue shelves.append({ 'name': shelf_name, 'books': shelf_count, 'exclusive': shelf_exclusive }) page_shelves += 1 if lazylibrarian.LOGLEVEL > 2: logger.debug( 'Shelf %s : %s: Exclusive %s' % (shelf_name, shelf_count, shelf_exclusive)) if lazylibrarian.LOGLEVEL > 2: logger.debug('Found %s shelves on page %s' % (page_shelves, current_page)) logger.debug( 'Found %s shelves on %s page%s' % (len(shelves), current_page - 1, plural(current_page - 1))) # print shelves return shelves
def ZOO(book=None): provider = "zooqle" host = lazylibrarian.ZOO_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/search?q=" + book['searchterm']) params = { "category": "books", "fmt": "rss" } searchURL = providerurl + "&%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None if link: url = link if magnet: # if both, prefer magnet over torrent url = magnet if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select( 'SELECT Title, Regex, DateType, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title,Regex,DateType,LastAcquired,IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored # logger.debug("Removing old magazine search results") # myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] datetype = searchmag['DateType'] if not datetype: datetype = '' if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({ "bookid": bookid, "searchterm": searchterm, "datetype": datetype }) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) so split into "words" dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '', '[': ' ', ']': ' ', '#': '# ' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # remove extra spaces if they're in a row nzbtitle_formatted = " ".join(nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_formatted.split(' ') results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # Check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: word = unaccented(word).lower() if word: wlist.append(word) for word in bookid_exploded: word = unaccented(word).lower() if word and word not in wlist: logger.debug( "Rejecting %s, missing %s" % (nzbtitle, word)) rejected = True break if rejected: logger.debug( "Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine title matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected and lazylibrarian.CONFIG[ 'BLACKLIST_FAILED']: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected and lazylibrarian.CONFIG[ 'BLACKLIST_PROCESSED']: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=?', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break if rejected: rejects += 1 else: regex_pass, issuedate, year = get_issue_date( nzbtitle_exploded) if regex_pass: logger.debug('Issue %s (regex %s) for %s ' % (issuedate, regex_pass, nzbtitle_formatted)) datetype_ok = True datetype = book['datetype'] if datetype: # check all wanted parts are in the regex result # Day Month Year Vol Iss (MM needs two months) if 'M' in datetype and regex_pass not in [ 1, 2, 3, 4, 5, 6, 7, 12 ]: datetype_ok = False elif 'D' in datetype and regex_pass not in [ 3, 5, 6 ]: datetype_ok = False elif 'MM' in datetype and regex_pass not in [ 1 ]: # bi monthly datetype_ok = False elif 'V' in datetype and 'I' in datetype and regex_pass not in [ 8, 9, 17, 18 ]: datetype_ok = False elif 'V' in datetype and regex_pass not in [ 2, 10, 11, 12, 13, 14, 17, 18 ]: datetype_ok = False elif 'I' in datetype and regex_pass not in [ 2, 10, 11, 12, 13, 14, 16, 17, 18 ]: datetype_ok = False elif 'Y' in datetype and regex_pass not in [ 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18 ]: datetype_ok = False else: datetype_ok = False logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers issuedate = "1970-01-01" # provide a fake date for bad-date issues # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" or "Have" insert_table = "pastissues" comp_date = 0 if datetype_ok: control_date = results['IssueDate'] logger.debug("Control date: [%s]" % control_date) if not control_date: # we haven't got any copies of this magazine yet # get a rough time just over MAX_AGE days ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # For magazines with only an issue number use zero as we can't tell age if str(issuedate).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % issuedate) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(issuedate)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % issuedate) control_date = 0 if str(control_date).isdigit() and str( issuedate).isdigit(): # for issue numbers, check if later than last one we have if regex_pass in [10, 12, 13] and year: issuedate = "%s%04d" % (year, int(issuedate)) else: issuedate = str(issuedate).zfill(4) if not control_date: comp_date = 1 else: comp_date = int(issuedate) - int( control_date) elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(issuedate)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( issuedate, control_date) else: # invalid comparison of date and issue number comp_date = 0 if re.match('\d+-\d\d-\d\d', str(control_date)): if regex_pass > 9 and year: # we assumed it was an issue number, but it could be a date year = check_int(year, 0) if regex_pass in [10, 12, 13]: issuedate = int(issuedate[:4]) issuenum = check_int(issuedate, 0) if year and 1 <= issuenum <= 12: issuedate = "%04d-%02d-01" % ( year, issuenum) comp_date = datecompare( issuedate, control_date) if not comp_date: logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) if not comp_date: bad_date += 1 issuedate = "1970-01-01" if issuedate == "1970-01-01": logger.debug( 'This issue of %s is unknown age; skipping.' % nzbtitle_formatted) elif not datetype_ok: logger.debug( 'This issue of %s not in a wanted date format.' % nzbtitle_formatted) elif comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + issuedate if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug(str(issues)) insert_table = "wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT Status from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, mag_entry['Status'])) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } if insert_table == 'pastissues': # try to mark ones we've already got match = myDB.match( "SELECT * from issues WHERE Title=? AND IssueDate=?", (bookid, issuedate)) if match: insert_status = "Have" else: insert_status = "Skipped" else: insert_status = "Wanted" newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": issuedate, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL & lazylibrarian.log_searchmag: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch, res = TORDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'direct': snatch, res = DirectDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') elif magazine['nzbmode'] == 'nzb': snatch, res = NZBDownloadMethod( magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'Magazine') else: res = 'Unhandled NZBmode [%s] for %s' % ( magazine['nzbmode'], magazine["nzburl"]) logger.error(res) snatch = 0 if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) custom_notify_snatch( "%s %s" % (magazine['bookid'], magazine['nzburl'])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) scheduleJob(action='Start', target='processDir') else: myDB.action( 'UPDATE wanted SET status="Failed",DLResult=? WHERE NZBurl=?', (res, magazine["nzburl"])) if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def LIME(book=None): provider = "Limetorrent" host = lazylibrarian.LIME_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix(host + "/searchrss/other/?q=" + book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError) as e: seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def _RecentBooks(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() feed = {'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now()} links = [] entries = [] links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=RecentBooks' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID " cmd += "from books where Status='Open' " if 'query' in kwargs: cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' " cmd += "order by BookLibrary DESC" results = myDB.select(cmd) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None if book['BookFile']: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: title = makeUnicode(book['BookName']) entry = {'title': escape(title), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookLibrary']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])), 'kind': 'acquisition', 'rel': 'file', 'type': mime_type} if lazylibrarian.CONFIG['OPDS_METAINFO']: author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID']) author = makeUnicode(author['AuthorName']) entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (title, book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (title, book['BookAdded'])) entries.append(entry) """ <link type="application/epub+zip" rel="http://opds-spec.org/acquisition" title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/> <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition" title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/> """ if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=RecentBooks&index=%s' % (self.opdsroot, index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) self.data = feed return
def TDL(book=None): provider = "torrentdownloads" host = lazylibrarian.TDL_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) result = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) url = None if success: new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if minimumseeders < int(seeders): if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def search_wishlist(): try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHWISHLIST" myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') scheduleJob(action='Stop', target='search_wishlist') return # No point in continuing # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if not # eg NYTimes wishlist if 'E' in book['types']: ebook_status = "Wanted" else: ebook_status = "Skipped" if 'A' in book['types']: audio_status = "Wanted" else: audio_status = "Skipped" if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']: cmd = 'select books.Status as Status,AudioStatus,authors.Status as AuthorStatus,' cmd += 'AuthorName,BookName,Requester,AudioRequester from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and bookid=?' bookmatch = myDB.match(cmd, (book['rss_bookid'],)) if bookmatch: cmd = 'SELECT SeriesName,Status from series,member ' cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?' series = myDB.select(cmd, (book['rss_bookid'],)) reject_series = None for ser in series: if ser['Status'] in ['Paused', 'Ignored']: reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']} break bookname = bookmatch['BookName'] if bookmatch['Status'] in ['Open', 'Wanted', 'Have']: logger.info('Found book %s, already marked %s' % (bookname, bookmatch['Status'])) if bookmatch["Requester"]: # Already on a wishlist if book["dispname"] not in bookmatch["Requester"]: newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']: logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus'])) elif reject_series: logger.info('Found book %s, but series "%s" is %s' % (bookname, reject_series['Name'], reject_series['Status'])) elif ebook_status == "Wanted": # skipped/ignored logger.info('Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 if bookmatch["Requester"]: # Already on a wishlist if book["dispname"] not in bookmatch["Requester"]: newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']: logger.info('Found audiobook %s, already marked %s' % (bookname, bookmatch['AudioStatus'])) if bookmatch["AudioRequester"]: # Already on a wishlist if book["dispname"] not in bookmatch["AudioRequester"]: newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"AudioRequester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) elif bookmatch['AuthorStatus'] in ['Paused', 'Ignored']: logger.info('Found book %s, but author is %s' % (bookname, bookmatch['AuthorStatus'])) elif reject_series: logger.info('Found book %s, but series "%s" is %s' % (bookname, reject_series['Name'], reject_series['Status'])) elif audio_status == "Wanted": # skipped/ignored logger.info('Found audiobook %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"AudioStatus": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 if bookmatch["AudioRequester"]: # Already on a wishlist if book["dispname"] not in bookmatch["AudioRequester"]: newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"AudioRequester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: import_book(book['rss_bookid'], ebook_status, audio_status) new_books += 1 newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) newValueDict = {"AudioRequester": book["dispname"] + ' '} controlValueDict = {"BookID": book['rss_bookid']} myDB.upsert("books", newValueDict, controlValueDict) else: item = {} results = None item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] bookmatch = finditem(item, book['rss_author']) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] auth_res = myDB.match('SELECT Status from authors WHERE authorname=?', (authorname,)) if auth_res: auth_status = auth_res['Status'] else: auth_status = 'Unknown' cmd = 'SELECT SeriesName,Status from series,member ' cmd += 'where series.SeriesID=member.SeriesID and member.BookID=?' series = myDB.select(cmd, (book['rss_bookid'],)) reject_series = None for ser in series: if ser['Status'] in ['Paused', 'Ignored']: reject_series = {"Name": ser['SeriesName'], "Status": ser['Status']} break if bookmatch['Status'] in ['Open', 'Wanted', 'Have']: logger.info( 'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['Status'])) if bookmatch["Requester"]: # Already on a wishlist if book["dispname"] not in bookmatch["Requester"]: newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) elif auth_status in ['Paused', 'Ignored']: logger.info('Found book %s, but author is "%s"' % (bookname, auth_status)) elif reject_series: logger.info('Found book %s, but series "%s" is %s' % (bookname, reject_series['Name'], reject_series['Status'])) elif ebook_status == 'Wanted': # skipped/ignored logger.info('Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 if bookmatch["Requester"]: # Already on a wishlist if book["dispname"] not in bookmatch["Requester"]: newValueDict = {"Requester": bookmatch["Requester"] + book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) if bookmatch['AudioStatus'] in ['Open', 'Wanted', 'Have']: logger.info( 'Found audiobook %s by %s, already marked as "%s"' % (bookname, authorname, bookmatch['AudioStatus'])) if bookmatch["AudioRequester"]: # Already on a wishlist if book["dispname"] not in bookmatch["AudioRequester"]: newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"AudioRequester": book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) elif auth_status in ['Paused', 'Ignored']: logger.info('Found book %s, but author is "%s"' % (bookname, auth_status)) elif reject_series: logger.info('Found book %s, but series "%s" is %s' % (bookname, reject_series['Name'], reject_series['Status'])) elif audio_status == 'Wanted': # skipped/ignored logger.info('Found audiobook %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"AudioStatus": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 if bookmatch["AudioRequester"]: # Already on a wishlist if book["dispname"] not in bookmatch["AudioRequester"]: newValueDict = {"AudioRequester": bookmatch["AudioRequester"] + book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) else: newValueDict = {"AudioRequester": book["dispname"] + ' '} controlValueDict = {"BookID": bookid} myDB.upsert("books", newValueDict, controlValueDict) else: # not in database yet if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid'], ebook_status, audio_status) new_books += 1 newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": result['bookid']} myDB.upsert("books", newValueDict, controlValueDict) newValueDict = {"AudioRequester": book["dispname"] + ' '} myDB.upsert("books", newValueDict, controlValueDict) bookmatch = True if not results: searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid'], ebook_status, audio_status) new_books += 1 newValueDict = {"Requester": book["dispname"] + ' '} controlValueDict = {"BookID": result['bookid']} myDB.upsert("books", newValueDict, controlValueDict) newValueDict = {"AudioRequester": book["dispname"] + ' '} myDB.upsert("books", newValueDict, controlValueDict) bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) result = results[0] # type: dict msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) except Exception: logger.error('Unhandled exception in search_wishlist: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = {"q": book['searchterm'], "category": "books", "fmt": "rss"} searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def _Author(self, **kwargs): index = 0 if 'index' in kwargs: index = check_int(kwargs['index'], 0) myDB = database.DBConnection() if 'authorid' not in kwargs: self.data = self._error_with_message('No Author Provided') return links = [] entries = [] links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot, ftype='application/opensearchdescription+xml', rel='search', title='Search Books')) author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'],)) author = makeUnicode(author['AuthorName']) cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE " if 'query' in kwargs: cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND " cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC" results = myDB.select(cmd, (kwargs['authorid'],)) page = results[index:(index + self.PAGE_SIZE)] for book in page: mime_type = None if book['BookFile']: mime_type = mimeType(book['BookFile']) elif book['AudioFile']: mime_type = mimeType(book['AudioFile']) if mime_type: entry = {'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])), 'id': escape('book:%s' % book['BookID']), 'updated': opdstime(book['BookAdded']), 'href': '%s?cmd=Serve&bookid=%s' % (self.opdsroot, book['BookID']), 'kind': 'acquisition', 'rel': 'file', 'type': mime_type} if lazylibrarian.CONFIG['OPDS_METAINFO']: entry['image'] = self.searchroot + '/' + book['BookImg'] entry['content'] = escape('%s - %s' % (book['BookName'], book['BookDesc'])) entry['author'] = escape('%s' % author) else: entry['content'] = escape('%s (%s)' % (book['BookName'], book['BookAdded'])) entries.append(entry) feed = {} authorname = '%s (%s)' % (escape(author), len(entries)) feed['title'] = 'LazyLibrarian OPDS - %s' % authorname feed['id'] = 'author:%s' % escape(kwargs['authorid']) feed['updated'] = now() links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='start', title='Home')) links.append(getLink(href='%s?cmd=Authors' % self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self')) if len(results) > (index + self.PAGE_SIZE): links.append( getLink(href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['authorid']), index + self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next')) if index >= self.PAGE_SIZE: links.append( getLink(href='%s?cmd=Author&authorid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['authorid']), index - self.PAGE_SIZE), ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous')) feed['links'] = links feed['entries'] = entries self.data = feed logger.debug("Returning %s book%s" % (len(entries), plural(len(entries)))) return
def search_nzb_book(books=None, reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "SEARCHNZB" if not lazylibrarian.USE_NZB(): logger.warn('No NEWZNAB/TORZNAB providers set, check config') return myDB = database.DBConnection() searchlist = [] if books is None: # We are performing a backlog search searchbooks = myDB.select( 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books WHERE Status="Wanted" order by BookAdded desc' ) else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select( 'SELECT BookID, AuthorName, BookName, BookSub from books WHERE BookID="%s" \ AND Status="Wanted"' % book['bookid']) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("NZB search requested for no books or invalid BookID") return else: logger.info('NZB Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' "' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] searchterm = searchterm + '"' searchlist.append({ "bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "searchterm": searchterm }) if not lazylibrarian.SAB_HOST and not lazylibrarian.NZB_DOWNLOADER_BLACKHOLE and not lazylibrarian.NZBGET_HOST: logger.warn( 'No download method is set, use SABnzbd/NZBGet or blackhole, check config' ) nzb_count = 0 for book in searchlist: # first attempt, try author/title in category "book" resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'book') if not nproviders: logger.warn( 'No NewzNab or TorzNab providers are set, check config') return # no point in continuing found = processResultList(resultlist, book, "book") # if you can't find the book, try author/title without any "(extended details, series etc)" if not found and '(' in book['bookName']: resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'shortbook') found = processResultList(resultlist, book, "shortbook") # if you can't find the book under "books", you might find under general search if not found: resultlist, nproviders = providers.IterateOverNewzNabSites( book, 'general') found = processResultList(resultlist, book, "general") if not found: logger.debug("NZB Searches for %s returned no results." % book['searchterm']) if found > True: nzb_count = nzb_count + 1 # we found it logger.info("NZBSearch for Wanted items complete, found %s book%s" % (nzb_count, plural(nzb_count))) if reset: scheduleJob(action='Restart', target='search_nzb_book')
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = {"type": "search", "cid": "2", "search": book['searchterm']} searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders']) link = item['link'] size = int(item['size']) url = None if link and minimumseeders < int(seeders): # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl + link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TPB(book=None): provider = "TPB" host = lazylibrarian.TPB_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/s/?q=" + book['searchterm']) params = {"category": "601", "page": "0", "orderby": "99"} searchURL = providerurl + "&%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[0] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 2: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) for col1, col2 in zip(c1, c2): try: title = unaccented( str(col1).split('title=')[1].split('>')[1].split('<')[0]) magnet = str(col1).split('href="')[1].split('"')[0] size = unaccented(col1.text.split(', Size ')[1].split('iB')[0]) mult = 1 try: if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col2.text) except ValueError: seeders = 0 if magnet and minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: if minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def find_results(self, searchterm=None, queue=None): """ GoogleBooks performs much better if we search for author OR title not both at once, so if searchterm is not isbn, two searches needed. Lazylibrarian searches use <ll> to separate title from author in searchterm If this token isn't present, it's an isbn or searchterm as supplied by user """ try: myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(searchterm): api_strings = ['isbn:'] api_hits = 0 ignored = 0 total_count = 0 no_author_count = 0 title = '' authorname = '' if ' <ll> ' in searchterm: # special token separates title from author title, authorname = searchterm.split(' <ll> ') fullterm = searchterm.replace(' <ll> ', ' ') logger.debug('Now searching Google Books API with searchterm: %s' % fullterm) for api_value in api_strings: set_url = self.url if api_value == "isbn:": set_url = set_url + quote(api_value + searchterm) elif api_value == 'intitle:': searchterm = fullterm if title: # just search for title # noinspection PyUnresolvedReferences title = title.split(' (')[0] # without any series info searchterm = title searchterm = searchterm.replace("'", "").replace('"', '').strip() # and no quotes if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote(api_value + '"' + searchterm + '"') elif api_value == 'inauthor:': searchterm = fullterm if authorname: searchterm = authorname # just search for author searchterm = searchterm.strip() if PY2: searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) set_url = set_url + quote_plus(api_value + '"' + searchterm + '"') startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL) if not jsonresults: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn('Found no results for %s with value: %s' % (api_value, searchterm)) break else: pass except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) if not book['author']: logger.debug('Skipped a result without authorfield.') no_author_count += 1 continue if not book['name']: logger.debug('Skipped a result without title.') continue valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = book['lang'] if booklang not in valid_langs: logger.debug( 'Skipped %s with language %s' % (book['name'], booklang)) ignored += 1 continue except KeyError: ignored += 1 logger.debug('Skipped %s where no language is found' % book['name']) continue if authorname: author_fuzz = fuzz.ratio(book['author'], authorname) else: author_fuzz = fuzz.ratio(book['author'], fullterm) if title: if title.endswith(')'): title = title.rsplit('(', 1)[0] book_fuzz = fuzz.token_set_ratio(book['name'], title) # lose a point for each extra word in the fuzzy matches so we get the closest match words = len(getList(book['name'])) words -= len(getList(title)) book_fuzz -= abs(words) else: book_fuzz = fuzz.token_set_ratio(book['name'], fullterm) isbn_fuzz = 0 if is_valid_isbn(fullterm): isbn_fuzz = 100 highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace AuthorID = '' if book['author']: match = myDB.match( 'SELECT AuthorID FROM authors WHERE AuthorName=?', (book['author'],)) if match: AuthorID = match['AuthorID'] resultlist.append({ 'authorname': book['author'], 'authorid': AuthorID, 'bookid': item['id'], 'bookname': bookname, 'booksub': book['sub'], 'bookisbn': book['isbn'], 'bookpub': book['pub'], 'bookdate': book['date'], 'booklang': book['lang'], 'booklink': book['link'], 'bookrate': float(book['rate']), 'bookrate_count': book['rate_count'], 'bookimg': book['img'], 'bookpages': book['pages'], 'bookgenre': book['genre'], 'bookdesc': book['desc'], 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': book['ratings'] }) resultcount += 1 except KeyError: break logger.debug("Returning %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, searchterm)) logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug('The Google Books API was hit %s time%s for searchterm: %s' % (api_hits, plural(api_hits), fullterm)) queue.put(resultlist) except Exception: logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
def EXTRA(book=None): provider = "Extratorrent" host = lazylibrarian.EXTRA_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urllib.urlencode(params) try: request = urllib2.Request(searchURL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', USER_AGENT) data = urllib2.urlopen(request, timeout=90) except (socket.timeout) as e: logger.debug('Timeout fetching data from %s' % provider) data = False except (urllib2.HTTPError, urllib2.URLError, ssl.SSLError) as e: # may return 404 if no results, not really an error if hasattr(e, 'code') and e.code == 404: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(searchURL) if hasattr(e, 'reason'): errmsg = e.reason else: errmsg = str(e) logger.debug('Error fetching data from %s: %s' % (provider, errmsg)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders'].replace(',', '')) link = item['link'] size = int(item['size']) url = None try: pubdate = item['published'] except KeyError: pubdate = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate logger.debug('Found %s. Size: %s' % (title, size)) results.append(res) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ if not startdir: if not lazylibrarian.DESTINATION_DIR: return 0 else: startdir = lazylibrarian.DESTINATION_DIR if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: myDB.action('DELETE from stats') logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN and startdir == lazylibrarian.DESTINATION_DIR: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"' ) status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookName = book['BookName'] bookAuthor = book['AuthorName'] bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (bookAuthor, bookName)) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace( "\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and ( subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except: res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] logger.debug("file meta [%s] [%s] [%s] [%s]" % (isbn, language, author, book)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if not match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug( "Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # "surname, forename" words = author.split(',') author = words[1].strip() + ' ' + words[0].strip( ) # "forename surname" if author[1] == ' ': author = author.replace(' ', '.') author = author.replace('..', '.') # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to # add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except: logger.warn( "Error finding author id for [%s]" % author) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', '_') match_auth = match_auth.replace(' ', '_') match_auth = match_auth.replace('__', '_') match_name = authorname.replace('.', '_') match_name = match_name.replace(' ', '_') match_name = match_name.replace('__', '_') match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio( match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] fuzz [%d]" % (author, match_fuzz)) logger.debug( "Failed to match author [%s] to authorname [%s]" % (match_auth, match_name)) # To save loading hundreds of books by unknown # authors at GR or GB, ignore if author "Unknown" if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() if not check_exist_author: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(author) check_exist_author = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % author).fetchone() except: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug( "Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name book = book.replace('"', '').replace("'", "") bookid = find_book_in_db(myDB, author, book) if bookid: # check if book is already marked as "Open" (if so, # we already had it) check_status = myDB.action( 'SELECT Status from books where BookID="%s"' % bookid).fetchone() if check_status['Status'] != 'Open': # update status as we've got this book myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) book_filename = os.path.join(r, files) # update book location so we can check if it # gets removed, or allow click-to-open myDB.action( 'UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = book_filename.rsplit(os.sep, 1)[0] coverimg = os.path.join( bookdir, 'cover.jpg') cachedir = os.path.join( str(lazylibrarian.PROG_DIR), 'data' + os.sep + 'images' + os.sep + 'cache') cacheimg = os.path.join( cachedir, bookid + '.jpg') if os.path.isfile(coverimg): copyfile(coverimg, cacheimg) new_book_count += 1 else: logger.debug( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s new/modified book%s found and added to the database" % (new_book_count, plural(new_book_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) # show statistics of full library scans if startdir == lazylibrarian.DESTINATION_DIR: stats = myDB.action( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats" ).fetchone() if stats['sum(GR_book_hits)'] is not None: # only show stats if new books added if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural(stats['sum(LT_lang_hits)']))) logger.debug( "Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug( "Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural(stats['sum(bad_lang)']))) logger.debug( "Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug( "Unable to cache %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug( "Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug( "Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural( lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.action( "select count('ISBN') as counter from languages").fetchone() logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) nolang = len( myDB.select( 'select BookID from Books where status="Open" and BookLang="Unknown"' )) if nolang: logger.warn( "Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) authors = myDB.select('select AuthorID from authors') # Update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr else: # single author/book import authors = myDB.select( 'select AuthorID from authors where AuthorName = "%s"' % author) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) images = myDB.select( 'select bookid, bookimg, bookname from books where bookimg like "http%"' ) if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg is not None: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select( 'select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"' ) if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg is not None: myDB.action( 'update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) setWorkPages() logger.info('Library scan complete') return new_book_count
def EXTRA(book=None): provider = "Extratorrent" host = lazylibrarian.EXTRA_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/rss") params = { "type": "search", "s_cat": "2", "search": book['searchterm'] } searchURL = providerurl + "/?%s" % urllib.urlencode(params) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug('Error fetching data from %s: %s' % (provider, data)) data = False results = [] minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 if data: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def RSS(host=None, feednr=None, priority=0): """ Generic RSS query function, just return all the results from the RSS feed in a list """ results = [] if not str(host)[:4] == "http": host = 'http://' + host URL = host result, success = fetchURL(URL) if success: data = feedparser.parse(result) else: logger.error('Error fetching data from %s: %s' % (host, result)) BlockProvider(host, result) data = None if data: # to debug because of api logger.debug('Parsing results from %s' % URL) provider = data['feed']['link'] logger.debug("RSS %s returned %i result%s" % (provider, len(data.entries), plural(len(data.entries)))) for post in data.entries: title = None magnet = None size = None torrent = None nzb = None url = None tortype = 'torrent' if 'title' in post: title = post.title if 'links' in post: for f in post.links: if 'x-bittorrent' in f['type']: size = f['length'] torrent = f['href'] break if 'x-nzb' in f['type']: size = f['length'] nzb = f['href'] break if 'torrent_magneturi' in post: magnet = post.torrent_magneturi if torrent: url = torrent tortype = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet tortype = 'magnet' if nzb: # prefer nzb over torrent/magnet url = nzb tortype = 'nzb' if not url: if 'link' in post: url = post.link tor_date = 'Fri, 01 Jan 1970 00:00:00 +0100' if 'newznab_attr' in post: if post.newznab_attr['name'] == 'usenetdate': tor_date = post.newznab_attr['value'] if not size: size = 1000 if title and url: results.append({ 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_date': tor_date, 'tor_feed': feednr, 'tor_type': tortype, 'priority': priority }) else: logger.debug('No data returned from %s' % host) return results
def TPB(book=None): provider = "TPB" host = lazylibrarian.TPB_HOST if not str(host)[:4] == "http": host = 'http://' + host providerurl = url_fix(host + "/s/?q=" + book['searchterm']) params = { "category": "601", "page": "0", "orderby": "99" } searchURL = providerurl + "&%s" % urllib.urlencode(params) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) result = False else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.NUMBEROFSEEDERS) - 1 soup = BeautifulSoup(result) try: table = soup.findAll('table')[0] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 2: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) for col1, col2 in zip(c1, c2): try: title = unaccented(str(col1).split('title=')[1].split('>')[1].split('<')[0]) magnet = str(col1).split('href="')[1].split('"')[0] size = unaccented(col1.text.split(', Size ')[1].split('iB')[0]) mult = 1 try: if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(col2.text) except ValueError: seeders = 0 if magnet and minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: if minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def NewzNabPlus(book=None, provider=None, searchType=None, searchMode=None): """ Generic NewzNabplus query function takes in host+key+type and returns the result set regardless of who based on site running NewzNab+ ref http://usenetreviewz.com/nzb-sites/ """ host = provider['HOST'] api_key = provider['API'] logger.debug( '[NewzNabPlus] searchType [%s] with Host [%s] mode [%s] using api [%s] for item [%s]' % (searchType, host, searchMode, api_key, str(book))) results = [] params = ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode) if params: if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?' + urllib.urlencode(params) sterm = book['searchterm'] if isinstance(sterm, str) and hasattr(sterm, "decode"): sterm = sterm.decode('utf-8') rootxml = None logger.debug("[NewzNabPlus] URL = %s" % URL) result, success = fetchURL(URL) if success: try: rootxml = ElementTree.fromstring(result) except Exception as e: logger.error('Error parsing data from %s: %s %s' % (host, type(e).__name__, str(e))) rootxml = None else: if not result or result == "''": result = "Got an empty response" logger.error('Error reading data from %s: %s' % (host, result)) BlockProvider(host, result) if rootxml is not None: # to debug because of api logger.debug('Parsing results from <a href="%s">%s</a>' % (URL, host)) if rootxml.tag == 'error': errormsg = rootxml.get('description', default='unknown error') logger.error("%s - %s" % (host, errormsg)) # maybe the host doesn't support the search type match = False if (provider['BOOKSEARCH'] and searchType in ["book", "shortbook"]) or \ (provider['AUDIOSEARCH'] and searchType in ["audio", "shortaudio"]): errorlist = [ 'no such function', 'unknown parameter', 'unknown function', 'bad request', 'incorrect parameter', 'does not support' ] for item in errorlist: if item in errormsg.lower(): match = True if match: count = 0 if searchType in ["book", "shortbook"]: msg = 'BOOKSEARCH' elif searchType in ["audio", "shortaudio"]: msg = 'AUDIOSEARCH' else: msg = '' if not msg: logger.error( 'Error trying to disable searchtype [%s] for %s' % (searchType, host)) else: while count < len(lazylibrarian.NEWZNAB_PROV): if lazylibrarian.NEWZNAB_PROV[count][ 'HOST'] == provider['HOST']: if str(provider['MANUAL']) == 'False': logger.error("Disabled %s=%s for %s" % (msg, provider[msg], provider['HOST'])) lazylibrarian.NEWZNAB_PROV[count][ msg] = "" threadname = threading.currentThread( ).name lazylibrarian.config_write() threading.currentThread( ).name = threadname else: logger.error( "Unable to disable %s for %s [MANUAL=%s]" % (msg, provider['HOST'], provider['MANUAL'])) count += 1 if not match: BlockProvider(provider['HOST'], errormsg) else: resultxml = rootxml.getiterator('item') nzbcount = 0 maxage = check_int(lazylibrarian.CONFIG['USENET_RETENTION'], 0) for nzb in resultxml: try: thisnzb = ReturnResultsFieldsBySearchType( book, nzb, host, searchMode, provider['DLPRIORITY']) if not maxage: nzbcount += 1 results.append(thisnzb) else: # example nzbdate format: Mon, 27 May 2013 02:12:09 +0200 nzbdate = thisnzb['nzbdate'] try: parts = nzbdate.split(' ') nzbdate = ' '.join( parts[:5]) # strip the +0200 dt = datetime.datetime.strptime( nzbdate, "%a, %d %b %Y %H:%M:%S").timetuple() nzbage = age( '%04d-%02d-%02d' % (dt.tm_year, dt.tm_mon, dt.tm_mday)) except Exception as e: logger.debug( 'Unable to get age from [%s] %s %s' % (thisnzb['nzbdate'], type(e).__name__, str(e))) nzbage = 0 if nzbage <= maxage: nzbcount += 1 results.append(thisnzb) else: logger.debug('%s is too old (%s day%s)' % (thisnzb['nzbtitle'], nzbage, plural(nzbage))) except IndexError: logger.debug('No results from %s for %s' % (host, sterm)) logger.debug('Found %s nzb at %s for: %s' % (nzbcount, host, sterm)) else: logger.debug('No data returned from %s for %s' % (host, sterm)) return results
def GEN(book=None): provider = "libgen" host = lazylibrarian.GEN_HOST if not str(host)[:4] == "http": host = 'http://' + host searchURL = url_fix(host + "/search.php?view=simple&open=0&phrase=0&column=def&res=100&req=" + book['searchterm']) result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) result = False results = [] if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] rows = table.findAll('tr') except Exception: # no results = no table in result page rows = [] c1 = [] c2 = [] c7 = [] c8 = [] if len(rows) > 1: for row in rows[1:]: if len(row.findAll('td')) > 8: c1.append(row.findAll('td')[1]) c2.append(row.findAll('td')[2]) c7.append(row.findAll('td')[7]) c8.append(row.findAll('td')[8]) for col1, col2, col7, col8 in zip(c1, c2, c7, c8): try: author = unaccented(col1.text) title = unaccented(str(col2).split('>')[2].split('<')[0].strip()) link = str(col2).split('href="')[1].split('?')[1].split('"')[0] size = unaccented(col7.text).upper() extn = col8.text try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError) as e: size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn bookURL = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(bookURL) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(bookURL) logger.debug('Error fetching data from %s: %s' % (provider, bookresult)) bookresult = False if bookresult: url = None new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output and output.startswith('/get.php'): url = output break if url: url = url_fix(host + url) results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), }) logger.debug('Found %s, Size %s' % (title, size)) except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug(u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urllib.urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') try: table = soup.find_all('table')[2] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if 'search.php' in search and len(rows) > 1: rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') link = data.get('href') extn = data.text.split('(')[0] size = data.text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text newsoup = BeautifulSoup(str(td[2]), 'html5lib') link = newsoup.get('href') except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) if not size: size = 0 else: try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if not link.startswith('http'): if "/ads.php?" in link: url = url_fix(host + link) else: url = url_fix(host + "/ads.php?" + link) else: url = redirect_url(host, link) bookresult, success = fetchURL(url) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug( "No results found from %s for %s" % (provider, sterm)) else: logger.debug(url) logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) errmsg = bookresult bookresult = False if bookresult: url = None try: new_soup = BeautifulSoup( bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.debug( '%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return try: threadname = threading.currentThread().name if "Thread-" in threadname: if not books: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" myDB = database.DBConnection() searchbooks = [] if not books: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'],)) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders, _ = IterateOverRSSSites() if not nproviders: logger.warn('No rss providers are available') scheduleJob(action='Stop', target='search_rss_book') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="eBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"],)) if snatched: logger.warn('eBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm}) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": cmd = 'SELECT BookID from wanted WHERE BookID=? and AuxInfo="AudioBook" and Status="Snatched"' snatched = myDB.match(cmd, (searchbook["BookID"],)) if snatched: logger.warn('AudioBook %s %s already marked snatched in wanted table' % (searchbook['AuthorName'], searchbook['BookName'])) else: searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm}) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("RSS Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > 1: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def cleanCache(): """ Remove unused files from the cache - delete if expired or unused. Check JSONCache WorkCache XMLCache SeriesCache Author Book Check covers and authorimages referenced in the database exist and change database entry if missing """ myDB = database.DBConnection() result = [] cache = os.path.join(lazylibrarian.CACHEDIR, "JSONCache") # ensure directory is unicode so we get unicode results from listdir if isinstance(cache, str): cache = cache.decode(lazylibrarian.SYS_ENCODING) cleaned = 0 kept = 0 if os.path.isdir(cache): for cached_file in os.listdir(cache): target = os.path.join(cache, cached_file) cache_modified_time = os.stat(target).st_mtime time_now = time.time() if cache_modified_time < time_now - ( lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60): # expire after this many seconds # Cache is old, delete entry os.remove(target) cleaned += 1 else: kept += 1 msg = "Cleaned %i file%s from JSONCache, kept %i" % (cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) cache = os.path.join(lazylibrarian.CACHEDIR, "XMLCache") # ensure directory is unicode so we get unicode results from listdir if isinstance(cache, str): cache = cache.decode(lazylibrarian.SYS_ENCODING) cleaned = 0 kept = 0 if os.path.isdir(cache): for cached_file in os.listdir(cache): target = os.path.join(cache, cached_file) cache_modified_time = os.stat(target).st_mtime time_now = time.time() if cache_modified_time < time_now - ( lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60): # expire after this many seconds # Cache is old, delete entry os.remove(target) cleaned += 1 else: kept += 1 msg = "Cleaned %i file%s from XMLCache, kept %i" % (cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) cache = os.path.join(lazylibrarian.CACHEDIR, "WorkCache") # ensure directory is unicode so we get unicode results from listdir if isinstance(cache, str): cache = cache.decode(lazylibrarian.SYS_ENCODING) cleaned = 0 kept = 0 if os.path.isdir(cache): for cached_file in os.listdir(cache): target = os.path.join(cache, cached_file) try: bookid = cached_file.split('.')[0] except IndexError: logger.error('Clean Cache: Error splitting %s' % cached_file) continue item = myDB.match('select BookID from books where BookID="%s"' % bookid) if not item: # WorkPage no longer referenced in database, delete cached_file os.remove(target) cleaned += 1 else: kept += 1 msg = "Cleaned %i file%s from WorkCache, kept %i" % (cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) cache = os.path.join(lazylibrarian.CACHEDIR, "SeriesCache") # ensure directory is unicode so we get unicode results from listdir if isinstance(cache, str): cache = cache.decode(lazylibrarian.SYS_ENCODING) cleaned = 0 kept = 0 if os.path.isdir(cache): for cached_file in os.listdir(cache): target = os.path.join(cache, cached_file) try: seriesid = cached_file.split('.')[0] except IndexError: logger.error('Clean Cache: Error splitting %s' % cached_file) continue item = myDB.match( 'select SeriesID from series where SeriesID="%s"' % seriesid) if not item: # SeriesPage no longer referenced in database, delete cached_file os.remove(target) cleaned += 1 else: kept += 1 msg = "Cleaned %i file%s from SeriesCache, kept %i" % ( cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) cache = lazylibrarian.CACHEDIR cleaned = 0 kept = 0 cachedir = os.path.join(cache, 'author') if os.path.isdir(cachedir): for cached_file in os.listdir(cachedir): target = os.path.join(cachedir, cached_file) if os.path.isfile(target): try: imgid = cached_file.split('.')[0].rsplit(os.sep)[-1] except IndexError: logger.error('Clean Cache: Error splitting %s' % cached_file) continue item = myDB.match( 'select AuthorID from authors where AuthorID="%s"' % imgid) if not item: # Author Image no longer referenced in database, delete cached_file os.remove(target) cleaned += 1 else: kept += 1 cachedir = os.path.join(cache, 'book') if os.path.isdir(cachedir): for cached_file in os.listdir(cachedir): target = os.path.join(cachedir, cached_file) if os.path.isfile(target): try: imgid = cached_file.split('.')[0].rsplit(os.sep)[-1] except IndexError: logger.error('Clean Cache: Error splitting %s' % cached_file) continue item = myDB.match( 'select BookID from books where BookID="%s"' % imgid) if not item: # Book Image no longer referenced in database, delete cached_file os.remove(target) cleaned += 1 else: kept += 1 # at this point there should be no more .jpg files in the root of the cachedir # any that are still there are for books/authors deleted from database for cached_file in os.listdir(cache): if cached_file.endswith('.jpg'): os.remove(os.path.join(cache, cached_file)) cleaned += 1 msg = "Cleaned %i file%s from ImageCache, kept %i" % ( cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) # verify the cover images referenced in the database are present images = myDB.action('select BookImg,BookName,BookID from books') cachedir = os.path.join(lazylibrarian.CACHEDIR, 'book') cleaned = 0 kept = 0 for item in images: keep = True imgfile = '' if item['BookImg'] is None or item['BookImg'] == '': keep = False if keep and not item['BookImg'].startswith( 'http') and not item['BookImg'] == "images/nocover.png": # html uses '/' as separator, but os might not imgname = item['BookImg'].rsplit('/')[-1] imgfile = os.path.join(cachedir, imgname) if not os.path.isfile(imgfile): keep = False if keep: kept += 1 else: cleaned += 1 logger.debug('Cover missing for %s %s' % (item['BookName'], imgfile)) myDB.action( 'update books set BookImg="images/nocover.png" where Bookid="%s"' % item['BookID']) msg = "Cleaned %i missing cover file%s, kept %i" % (cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) # verify the author images referenced in the database are present images = myDB.action('select AuthorImg,AuthorName,AuthorID from authors') cachedir = os.path.join(lazylibrarian.CACHEDIR, 'author') cleaned = 0 kept = 0 for item in images: keep = True imgfile = '' if item['AuthorImg'] is None or item['AuthorImg'] == '': keep = False if keep and not item['AuthorImg'].startswith( 'http') and not item['AuthorImg'] == "images/nophoto.png": # html uses '/' as separator, but os might not imgname = item['AuthorImg'].rsplit('/')[-1] imgfile = os.path.join(cachedir, imgname) if not os.path.isfile(imgfile): keep = False if keep: kept += 1 else: cleaned += 1 logger.debug('Image missing for %s %s' % (item['AuthorName'], imgfile)) myDB.action( 'update authors set AuthorImg="images/nophoto.png" where AuthorID="%s"' % item['AuthorID']) msg = "Cleaned %i missing author image%s, kept %i" % ( cleaned, plural(cleaned), kept) result.append(msg) logger.debug(msg) return result
def searchItem(item=None, bookid=None, cat=None): """ Call all active search providers to search for item return a list of results, each entry in list containing percentage_match, title, provider, size, url item = searchterm to use for general search bookid = link to data for book/audio searches cat = category to search [general, book, audio] """ results = [] if not item: return results book = {} searchterm = unaccented_str(item) book['searchterm'] = searchterm if bookid: book['bookid'] = bookid else: book['bookid'] = searchterm if cat in ['book', 'audio']: myDB = database.DBConnection() cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID' cmd += ' and bookID=?' match = myDB.match(cmd, (bookid,)) if match: book['authorName'] = match['authorName'] book['bookName'] = match['bookName'] book['bookSub'] = match['bookSub'] else: logger.debug('Forcing general search') cat = 'general' nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT() logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm)) if lazylibrarian.USE_NZB(): resultlist, nprov = IterateOverNewzNabSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_TOR(): resultlist, nprov = IterateOverTorrentSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_DIRECT(): resultlist, nprov = IterateOverDirectSites(book, cat) if nprov: results += resultlist if lazylibrarian.USE_RSS(): resultlist, nprov = IterateOverRSSSites() if nprov: results += resultlist # reprocess to get consistent results searchresults = [] for item in results: provider = '' title = '' url = '' size = '' date = '' mode = '' if 'nzbtitle' in item: title = item['nzbtitle'] if 'nzburl' in item: url = item['nzburl'] if 'nzbprov' in item: provider = item['nzbprov'] if 'nzbsize' in item: size = item['nzbsize'] if 'nzbdate' in item: date = item['nzbdate'] if 'nzbmode' in item: mode = item['nzbmode'] if 'tor_title' in item: title = item['tor_title'] if 'tor_url' in item: url = item['tor_url'] if 'tor_prov' in item: provider = item['tor_prov'] if 'tor_size' in item: size = item['tor_size'] if 'tor_date' in item: date = item['tor_date'] if 'tor_type' in item: mode = item['tor_type'] if title and provider and mode and url: # Not all results have a date or a size if not date: date = 'Fri, 01 Jan 1970 00:00:00 +0100' if not size: size = '1000' # calculate match percentage - torrents might have words_with_underscore_separator score = fuzz.token_set_ratio(searchterm, title.replace('_', ' ')) # lose a point for each extra word in the title so we get the closest match words = len(getList(searchterm)) words -= len(getList(title)) score -= abs(words) if score >= 40: # ignore wildly wrong results? if not url.startswith('magnet'): if not mode == 'torznab' and not mode == 'direct': # what is this split for?? url = url.split('?')[0] result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date, 'url': urllib.quote_plus(url), 'mode': mode} searchresults.append(result) # from operator import itemgetter # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True) logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm)) return searchresults
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg