def _notify(self, message=None, event=None, pushbullet_token=None, pushbullet_deviceid=None, force=False): """ Sends a pushbullet notification based on the provided info or LL config title: The title of the notification to send message: The message string to send username: The username to send the notification to (optional, defaults to the username in the config) force: If True then the notification will be sent even if pushbullet is disabled in the config """ try: message = unaccented(message) except Exception as e: logger.warn("Pushbullet: could not convert message: %s" % e) # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_PUSHBULLET'] and not force: return False logger.debug("Pushbullet: Sending notification " + str(message)) return self._sendPushbullet(message, event, pushbullet_token, pushbullet_deviceid, force=force)
def setBookAuthors(book): myDB = database.DBConnection() newauthors = 0 newrefs = 0 try: authorlist = getBookAuthors(book['bookid']) for author in authorlist: authtype = author['type'] if authtype in ['primary author', 'main author', 'secondary author']: if author['role'] in ['Author', '—'] and author['work'] == 'all editions': name = formatAuthorName(unaccented(author['name'])) exists = myDB.match('select authorid from authors where authorname=?', (name,)) if exists: authorid = exists['authorid'] else: # try to add new author to database by name name, authorid, new = lazylibrarian.importer.addAuthorNameToDB(name, False, False) if new and authorid: newauthors += 1 if authorid: # suppress duplicates in bookauthors myDB.action('INSERT into bookauthors (AuthorID, BookID) VALUES (?, ?)', (authorid, book['bookid']), suppress='UNIQUE') newrefs += 1 except Exception as e: logger.debug("Error parsing authorlist for %s: %s" % (book['bookname'], str(e))) return newauthors, newrefs
def _notify(self, message=None, event=None, slack_token=None, method=None, force=False): """ Sends a slack incoming-webhook notification based on the provided info or LL config message: The message string to send force: If True then the notification will be sent even if slack is disabled in the config """ try: message = unaccented(message) except Exception, e: logger.warn("Slack: could not convert message: %s" % e)
def _notify(self, message=None, event=None, pushbullet_token=None, pushbullet_deviceid=None): """ Sends a pushbullet notification based on the provided info or LL config title: The title of the notification to send message: The message string to send username: The username to send the notification to (optional, defaults to the username in the config) force: If True then the notification will be sent even if pushbullet is disabled in the config """ try: message = unaccented(message) except Exception, e: logger.warn("Pushbullet: could not convert message: %s" % e)
def log(message, level): logger = logging.getLogger('lazylibrarian') threadname = threading.currentThread().getName() # Get the frame data of the method that made the original logger call if len(inspect.stack()) > 2: frame = inspect.getframeinfo(inspect.stack()[2][0]) program = os.path.basename(frame.filename) method = frame.function lineno = frame.lineno else: program = "" method = "" lineno = "" if 'windows' in platform.system().lower( ): # windows cp1252 can't handle some accents message = formatter.unaccented(message) elif PY2: message = formatter.safe_unicode(message) message = message.encode(lazylibrarian.SYS_ENCODING) if level != 'DEBUG' or lazylibrarian.LOGLEVEL >= 2: # Limit the size of the "in-memory" log, as gets slow if too long lazylibrarian.LOGLIST.insert(0, (formatter.now(), level, threadname, program, method, lineno, message)) if len(lazylibrarian.LOGLIST) > formatter.check_int( lazylibrarian.CONFIG['LOGLIMIT'], 500): del lazylibrarian.LOGLIST[-1] message = "%s : %s:%s:%s : %s" % (threadname, program, method, lineno, message) if level == 'DEBUG': logger.debug(message) elif level == 'INFO': logger.info(message) elif level == 'WARNING': logger.warning(message) else: logger.error(message)
def _notify(self, message=None, event=None, slack_token=None, method=None, force=False): """ Sends a slack incoming-webhook notification based on the provided info or LL config message: The message string to send force: If True then the notification will be sent even if slack is disabled in the config """ try: message = unaccented(message) except Exception as e: logger.warn("Slack: could not convert message: %s" % e) # suppress notifications if the notifier is disabled but the notify options are checked if not lazylibrarian.CONFIG['USE_SLACK'] and not force: return False return self._sendSlack(message, event, slack_token, method, force)
def safe_move(src, dst, action='move'): """ Move or copy src to dst Retry without accents if unicode error as some file systems can't handle (some) accents Retry with some characters stripped if bad filename eg windows can't handle <>?":| (and maybe others) in filenames Return (new) dst if success """ while action: # might have more than one problem... try: if action == 'copy': shutil.copy(src, dst) else: shutil.move(src, dst) return dst except UnicodeEncodeError: newdst = unaccented(dst) if newdst != dst: dst = newdst else: raise except IOError as e: if e.errno == 22: # bad mode or filename drive, path = os.path.splitdrive(dst) # strip some characters windows can't handle newpath = replace_all(path, __dic__) # windows filenames can't end in space or dot while newpath and newpath[-1] in '. ': newpath = newpath[:-1] # anything left? has it changed? if newpath and newpath != path: dst = os.path.join(drive, newpath) else: raise else: raise except Exception: raise return dst
def find_author_id(self, refresh=False): author = self.name author = formatAuthorName(unaccented(author)) URL = 'https://www.goodreads.com/api/author_url/' + urllib.quote(author) + '?' + urllib.urlencode(self.params) # googlebooks gives us author names with long form unicode characters if isinstance(author, str) and hasattr(author, "decode"): author = author.decode('utf-8') # make unicode author = unicodedata.normalize('NFC', author) # normalize to short form logger.debug("Searching for author with name: %s" % author) authorlist = [] try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("%s finding authorid: %s, %s" % (type(e).__name__, URL, str(e))) return authorlist if rootxml is None: logger.debug("Error requesting authorid") return authorlist resultxml = rootxml.getiterator('author') if resultxml is None: logger.warn('No authors found with name: %s' % author) else: # In spite of how this looks, goodreads only returns one result, even if there are multiple matches # we just have to hope we get the right one. eg search for "James Lovelock" returns "James E. Lovelock" # who only has one book listed under googlebooks, the rest are under "James Lovelock" # goodreads has all his books under "James E. Lovelock". Can't come up with a good solution yet. # For now we'll have to let the user handle this by selecting/adding the author manually for author in resultxml: authorid = author.attrib.get("id") authorlist = self.get_author_info(authorid) return authorlist
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode( self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] booksub = '' bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = {"Series": series, "SeriesNum": seriesNum} myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if ('invalid' in resp or 'Unknown' in resp): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error( "Error finding LT language result for [%s], %s" % (isbn, e)) find_field = "id" # reset the field to search on goodreads if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = get_xml_request( BOOK_URL) if BOOK_rootxml is None: logger.debug( 'Error requesting book language code' ) bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find( './book/language_code').text except Exception as e: logger.error( "Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug( "GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug( "No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] else: booksub = '' dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # We use bookid, then reject if another author/title has a different bookid so we just keep one... find_book_status = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] locked = resulted['Manual'] else: book_status = lazylibrarian.NEWBOOK_STATUS locked = False rejected = False if re.match('[^\w-]', bookname ): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 rejected = True if not rejected and not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults = removedResults + 1 rejected = True if not rejected: find_books = myDB.select( 'SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' % (bookname, authorNameResult)) if find_books: for find_book in find_books: if find_book['BookID'] != bookid: # we have a book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (find_book['BookID'], authorNameResult, bookname, bookid)) duplicates = duplicates + 1 rejected = True break if not rejected: find_books = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_books: # we have a book with this bookid already logger.debug( 'Rejecting bookid %s for [%s][%s] already got this bookid in database' % (bookid, authorNameResult, bookname)) duplicates = duplicates + 1 rejected = True break if not rejected: if book_status != "Ignored": if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None lastbook = myDB.action( 'SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone() if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count))) myDB.action( 'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) return books_dict
def search_rss_book(books=None, library=None): """ books is a list of new books to add, or None for backlog search library is "eBook" or "AudioBook" or None to search all book types """ try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverWishLists() new_books = 0 if not wishproviders: logger.debug('No wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) for book in resultlist: # we get rss_author, rss_title, maybe rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and book['rss_bookid']: bookmatch = myDB.match('select Status,BookName from books where bookid=?', (book['rss_bookid'],)) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info(u'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": book['rss_bookid']} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: import_book(book['rss_bookid']) new_books += 1 else: item = {} headers = [] item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] headers.append('BookID') if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] headers.append('ISBN') bookmatch = finditem(item, book['rss_author'], headers) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) new_books += 1 else: # not in database yet results = '' if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] # type: dict if result['isbn_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not results: searchterm = "%s <ll> %s" % (item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] # type: dict if result['author_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90) \ and result['book_fuzz'] > check_int(lazylibrarian.CONFIG['MATCH_RATIO'], 90): logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) new_books += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if new_books: logger.info("Wishlist marked %s book%s as Wanted" % (new_books, plural(new_books))) searchbooks = [] if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded, books.Status, AudioStatus ' cmd += 'from books,authors WHERE (books.Status="Wanted" OR AudioStatus="Wanted") ' cmd += 'and books.AuthorID = authors.AuthorID order by BookAdded desc' results = myDB.select(cmd) for terms in results: searchbooks.append(terms) else: # The user has added a new book for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub, books.Status, AudioStatus ' cmd += 'from books,authors WHERE BookID=? AND books.AuthorID = authors.AuthorID' results = myDB.select(cmd, (book['bookid'],)) for terms in results: searchbooks.append(terms) if len(searchbooks) == 0: logger.debug("SearchRSS - No books to search for") return resultlist, nproviders = IterateOverRSSSites() if not nproviders and not wishproviders: logger.warn('No rss providers are available') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) searchlist = [] for searchbook in searchbooks: # searchterm is only used for display purposes searchterm = searchbook['AuthorName'] + ' ' + searchbook['BookName'] if searchbook['BookSub']: searchterm = searchterm + ': ' + searchbook['BookSub'] if library is None or library == 'eBook': if searchbook['Status'] == "Wanted": searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "eBook", "searchterm": searchterm}) if library is None or library == 'AudioBook': if searchbook['AudioStatus'] == "Wanted": searchlist.append( {"bookid": searchbook['BookID'], "bookName": searchbook['BookName'], "bookSub": searchbook['BookSub'], "authorName": searchbook['AuthorName'], "library": "AudioBook", "searchterm": searchterm}) rss_count = 0 for book in searchlist: if book['library'] == 'AudioBook': searchtype = 'audio' else: searchtype = 'book' found = processResultList(resultlist, book, searchtype, 'rss') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in book['bookName']: # anything to shorten? searchtype = 'short' + searchtype found = processResultList(resultlist, book, searchtype, 'rss') if not found: logger.info("NZB Searches for %s %s returned no results." % (book['library'], book['searchterm'])) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def addAuthorNameToDB(author=None, refresh=False, addbooks=True): # get authors name in a consistent format, look them up in the database # if not in database, try to import them. # return authorname,new where new=False if author already in db, new=True if added # authorname returned is our preferred name, or empty string if not found or unable to add myDB = database.DBConnection() new = False if len(author) < 2: logger.debug('Invalid Author Name [%s]' % author) return "", "", False author = formatAuthorName(author) # Check if the author exists, and import the author if not, check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorName="%s"' % author.replace('"', '""')) if not check_exist_author and lazylibrarian.CONFIG['ADD_AUTHOR']: logger.debug('Author %s not found in database, trying to add' % author) # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except Exception as e: logger.warn("Error finding author id for [%s] %s" % (author, str(e))) return "", "", False # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] #authorid = author_gr['authorid'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', ' ') match_auth = ' '.join(match_auth.split()) match_name = authorname.replace('.', ' ') match_name = ' '.join(match_name.split()) match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug( "Failed to match author [%s] to authorname [%s] fuzz [%d]" % (author, match_name, match_fuzz)) # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] authorid = author_gr['authorid'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: logger.debug('Found goodreads authorname %s in database' % author) else: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(authorname=author, refresh=refresh, authorid=authorid, addbooks=addbooks) check_exist_author = myDB.match( 'SELECT AuthorID FROM authors where AuthorID="%s"' % authorid) if check_exist_author: new = True except Exception: logger.debug('Failed to add author [%s] to db' % author) # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) return "", "", False return author, check_exist_author['AuthorID'], new
def ReturnSearchTypeStructure(provider, api_key, book, searchType, searchMode): params = None if searchType in ["book", "shortbook"]: authorname, bookname = get_searchterm(book, searchType) if provider['BOOKSEARCH'] and provider[ 'BOOKCAT']: # if specific booksearch, use it params = { "t": provider['BOOKSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['BOOKCAT'] } elif provider['GENERALSEARCH'] and provider[ 'BOOKCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['BOOKCAT'] } elif searchType in ["audio", "shortaudio"]: authorname, bookname = get_searchterm(book, searchType) if provider['AUDIOSEARCH'] and provider[ 'AUDIOCAT']: # if specific audiosearch, use it params = { "t": provider['AUDIOSEARCH'], "apikey": api_key, "title": bookname, "author": authorname, "cat": provider['AUDIOCAT'] } elif provider['GENERALSEARCH'] and provider[ 'AUDIOCAT']: # if not, try general search params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": authorname + ' ' + bookname, "cat": provider['AUDIOCAT'] } elif searchType == "mag": if provider['MAGSEARCH'] and provider[ 'MAGCAT']: # if specific magsearch, use it params = { "t": provider['MAGSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": unaccented(book['searchterm'].replace(':', '')), "extended": provider['EXTENDED'], } elif provider['GENERALSEARCH'] and provider['MAGCAT']: params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "cat": provider['MAGCAT'], "q": unaccented(book['searchterm'].replace(':', '')), "extended": provider['EXTENDED'], } else: if provider['GENERALSEARCH']: if searchType == "shortgeneral": searchterm = unaccented( book['searchterm'].split('(')[0].replace(':', '')) else: searchterm = unaccented(book['searchterm'].replace(':', '')) params = { "t": provider['GENERALSEARCH'], "apikey": api_key, "q": searchterm, "extended": provider['EXTENDED'], } if params: logger.debug('[NewzNabPlus] - %s Search parameters set to %s' % (searchMode, str(params))) else: logger.debug( '[NewzNabPlus] - %s No matching search parameters for %s' % (searchMode, searchType)) return params
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = {"q": book['searchterm'], "category": "books", "fmt": "rss"} searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds']) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def find_book(bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = get_json_request(URL) if not jsonresults: logger.debug('No results found for %s' % bookid) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if booklang not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, booklang)) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = "" series = "" seriesNum = "" try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split( ')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" except KeyError: booksub = "" try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = "" try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = "" except KeyError: bookisbn = "" booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def import_CSV(search_dir=None): """ Find a csv file in the search_dir and process all the books in it, adding authors to the database if not found and marking the books as "Wanted" """ try: if not search_dir: msg = "Alternate Directory not configured" logger.warn(msg) return msg elif not os.path.isdir(search_dir): msg = "Alternate Directory [%s] not found" % search_dir logger.warn(msg) return msg csvFile = csv_file(search_dir) headers = None content = {} if not csvFile: msg = "No CSV file found in %s" % search_dir logger.warn(msg) return msg else: logger.debug(u'Reading file %s' % csvFile) reader = csv.reader(open(csvFile)) for row in reader: if reader.line_num == 1: # If we are on the first line, create the headers list from the first row headers = row else: # Otherwise, the key in the content dictionary is the first item in the # row and we can create the sub-dictionary by using the zip() function. # we include the key in the dictionary as our exported csv files use # bookid as the key content[row[0]] = dict(zip(headers, row)) # We can now get to the content by using the resulting dictionary, so to see # the list of lines, we can do: print content.keys() to get a list of keys # To see the list of fields available for each book: print headers if 'Author' not in headers or 'Title' not in headers: msg = 'Invalid CSV file found %s' % csvFile logger.warn(msg) return msg myDB = database.DBConnection() bookcount = 0 authcount = 0 skipcount = 0 logger.debug(u"CSV: Found %s book%s in csv file" % (len(content.keys()), plural(len(content.keys())))) for item in content.keys(): authorname = content[item]['Author'] if isinstance(authorname, str) and hasattr(authorname, "decode"): authorname = authorname.decode(lazylibrarian.SYS_ENCODING) authorname = formatAuthorName(authorname) title = content[item]['Title'] if isinstance(title, str) and hasattr(title, "decode"): title = title.decode(lazylibrarian.SYS_ENCODING) authmatch = myDB.match('SELECT * FROM authors where AuthorName=?', (authorname,)) if authmatch: logger.debug(u"CSV: Author %s found in database" % authorname) else: logger.debug(u"CSV: Author %s not found" % authorname) newauthor, authorid, new = addAuthorNameToDB(author=authorname, addbooks=lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']) if len(newauthor) and newauthor != authorname: logger.debug("Preferred authorname changed from [%s] to [%s]" % (authorname, newauthor)) authorname = newauthor if new: authcount += 1 bookmatch = finditem(content[item], authorname, headers) result = '' if bookmatch: authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info(u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) bookcount += 1 else: searchterm = "%s <ll> %s" % (title, authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info("Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookcount += 1 bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (title, authorname) if not result: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) skipcount += 1 msg = "Added %i new author%s, marked %i book%s as 'Wanted', %i book%s not found" % \ (authcount, plural(authcount), bookcount, plural(bookcount), skipcount, plural(skipcount)) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in importCSV: %s' % traceback.format_exc() logger.error(msg) return msg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = {"search": book['searchterm'], "page": page, "cat": cat} searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[2].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def search_magazines(mags=None, reset=False): # produce a list of magazines to search for, tor, nzb, torznab, rss # noinspection PyBroadException try: threadname = threading.currentThread().name if "Thread-" in threadname: if mags is None: threading.currentThread().name = "SEARCHALLMAG" else: threading.currentThread().name = "SEARCHMAG" myDB = database.DBConnection() searchlist = [] if mags is None: # backlog search searchmags = myDB.select('SELECT Title, Regex, LastAcquired, \ IssueDate from magazines WHERE Status="Active"' ) else: searchmags = [] for magazine in mags: searchmags_temp = myDB.select( 'SELECT Title, Regex, LastAcquired, IssueDate from magazines \ WHERE Title=? AND Status="Active"', (magazine['bookid'], )) for terms in searchmags_temp: searchmags.append(terms) if len(searchmags) == 0: threading.currentThread().name = "WEBSERVER" return # should clear old search results as might not be available any more # ie torrent not available, changed providers, out of news server retention etc. # Only delete the "skipped" ones, not wanted/snatched/processed/ignored logger.debug("Removing old magazine search results") myDB.action('DELETE from pastissues WHERE Status="Skipped"') logger.info('Searching for %i magazine%s' % (len(searchmags), plural(len(searchmags)))) for searchmag in searchmags: bookid = searchmag['Title'] searchterm = searchmag['Regex'] if not searchterm: dic = { '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '' } # strip accents from the magazine title for easier name-matching searchterm = unaccented_str(searchmag['Title']) if not searchterm: # unless there are no ascii characters left searchterm = searchmag['Title'] searchterm = replace_all(searchterm, dic) searchterm = re.sub('[.\-/]', ' ', searchterm) searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING) searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not searchlist: logger.warn( 'There is nothing to search for. Mark some magazines as active.' ) for book in searchlist: resultlist = [] if lazylibrarian.USE_NZB(): resultlist, nproviders = IterateOverNewzNabSites(book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_NZB_MSG, 0) + 1200 < timenow: logger.warn( 'No nzb providers are available. Check config and blocklist' ) lazylibrarian.NO_NZB_MSG = timenow if lazylibrarian.USE_DIRECT(): dir_resultlist, nproviders = IterateOverDirectSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_DIRECT_MSG, 0) + 1200 < timenow: logger.warn( 'No direct providers are available. Check config and blocklist' ) lazylibrarian.NO_DIRECT_MSG = timenow if dir_resultlist: for item in dir_resultlist: # reformat the results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_TOR(): tor_resultlist, nproviders = IterateOverTorrentSites( book, 'mag') if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_TOR_MSG, 0) + 1200 < timenow: logger.warn( 'No tor providers are available. Check config and blocklist' ) lazylibrarian.NO_TOR_MSG = timenow if tor_resultlist: for item in tor_resultlist: # reformat the torrent results so they look like nzbs resultlist.append({ 'bookid': item['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': 'Fri, 01 Jan 1970 00:00:00 +0100', # fake date as none returned from torrents 'nzbsize': item['tor_size'], 'nzbmode': 'torrent' }) if lazylibrarian.USE_RSS(): rss_resultlist, nproviders = IterateOverRSSSites() if not nproviders: # don't nag. Show warning message no more than every 20 mins timenow = int(time.time()) if check_int(lazylibrarian.NO_RSS_MSG, 0) + 1200 < timenow: logger.warn( 'No rss providers are available. Check config and blocklist' ) lazylibrarian.NO_RSS_MSG = timenow if rss_resultlist: for item in rss_resultlist: # reformat the rss results so they look like nzbs resultlist.append({ 'bookid': book['bookid'], 'nzbprov': item['tor_prov'], 'nzbtitle': item['tor_title'], 'nzburl': item['tor_url'], 'nzbdate': item[ 'tor_date'], # may be fake date as none returned from rss torrents, only rss nzb 'nzbsize': item['tor_size'], 'nzbmode': item['tor_type'] }) if not resultlist: logger.debug("No results for magazine %s" % book['searchterm']) else: bad_name = 0 bad_date = 0 old_date = 0 rejects = 0 total_nzbs = 0 new_date = 0 maglist = [] issues = [] bookid = '' for nzb in resultlist: total_nzbs += 1 bookid = nzb['bookid'] # strip accents from the magazine title for easier name-matching nzbtitle = unaccented_str(nzb['nzbtitle']) if not nzbtitle: # unless it's not a latin-1 encodable name nzbtitle = nzb['nzbtitle'] nzbtitle = nzbtitle.replace('"', '').replace( "'", "") # suppress " in titles nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] nzbdate_temp = nzb['nzbdate'] nzbsize_temp = nzb['nzbsize'] nzbsize_temp = check_int( nzbsize_temp, 1000 ) # not all torrents returned by torznab have a size nzbsize = round(float(nzbsize_temp) / 1048576, 2) nzbdate = nzbdate2format(nzbdate_temp) nzbmode = nzb['nzbmode'] results = myDB.match( 'SELECT * from magazines WHERE Title=?', (bookid, )) if not results: logger.debug( 'Magazine [%s] does not match search term [%s].' % (nzbtitle, bookid)) bad_name += 1 else: rejected = False maxsize = check_int( lazylibrarian.CONFIG['REJECT_MAGSIZE'], 0) if maxsize and nzbsize > maxsize: logger.debug("Rejecting %s, too large" % nzbtitle) rejected = True if not rejected: minsize = check_int( lazylibrarian.CONFIG['REJECT_MAGMIN'], 0) if minsize and nzbsize < minsize: logger.debug("Rejecting %s, too small" % nzbtitle) rejected = True if not rejected: dic = { '.': ' ', '-': ' ', '/': ' ', '+': ' ', '_': ' ', '(': '', ')': '' } nzbtitle_formatted = replace_all(nzbtitle, dic).strip() # Need to make sure that substrings of magazine titles don't get found # (e.g. Maxim USA will find Maximum PC USA) # remove extra spaces if they're in a row if nzbtitle_formatted and nzbtitle_formatted[ 0] == '[' and nzbtitle_formatted[-1] == ']': nzbtitle_formatted = nzbtitle_formatted[1:-1] nzbtitle_exploded_temp = " ".join( nzbtitle_formatted.split()) nzbtitle_exploded = nzbtitle_exploded_temp.split( ' ') if ' ' in bookid: bookid_exploded = bookid.split(' ') else: bookid_exploded = [bookid] # check nzb has magazine title and a date/issue nr # eg The MagPI July 2015 if len(nzbtitle_exploded) > len(bookid_exploded): # needs to be longer as it has to include a date # check all the words in the mag title are in the nzbtitle rejected = False wlist = [] for word in nzbtitle_exploded: wlist.append(unaccented(word).lower()) for word in bookid_exploded: if unaccented(word).lower() not in wlist: rejected = True break if rejected: logger.debug( u"Magazine title match failed " + bookid + " for " + nzbtitle_formatted) else: logger.debug(u"Magazine matched " + bookid + " for " + nzbtitle_formatted) else: logger.debug("Magazine name too short (%s)" % len(nzbtitle_exploded)) rejected = True if not rejected: blocked = myDB.match( 'SELECT * from wanted WHERE NZBurl=? and Status="Failed"', (nzburl, )) if blocked: logger.debug( "Rejecting %s, blacklisted at %s" % (nzbtitle_formatted, blocked['NZBprov'])) rejected = True if not rejected: reject_list = getList( str(results['Reject']).lower()) reject_list += getList( lazylibrarian.CONFIG['REJECT_MAGS']) lower_title = unaccented( nzbtitle_formatted).lower() lower_bookid = unaccented(bookid).lower() if reject_list: if lazylibrarian.LOGLEVEL > 2: logger.debug('Reject: %s' % str(reject_list)) logger.debug('Title: %s' % lower_title) logger.debug('Bookid: %s' % lower_bookid) for word in reject_list: if word in lower_title and word not in lower_bookid: rejected = True logger.debug("Rejecting %s, contains %s" % (nzbtitle_formatted, word)) break regex_pass = 0 if not rejected: # Magazine names have many different styles of date # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY # MonthName DD YYYY or MonthName DD, YYYY # YYYY MM or YYYY MM DD # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn # nn YYYY issue number without "Nr" before it # issue and year as a single 6 digit string eg 222015 newdatish = "none" # DD MonthName YYYY OR MonthName YYYY or Issue nn, MonthName YYYY pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos: month = month2num(nzbtitle_exploded[pos - 1]) if month: if pos - 1: day = check_int( nzbtitle_exploded[pos - 2], 1) if day > 31: # probably issue number nn day = 1 else: day = 1 newdatish = "%04d-%02d-%02d" % ( year, month, day) try: _ = datetime.date(year, month, day) regex_pass = 1 break except ValueError: regex_pass = 0 pos += 1 # MonthName DD YYYY or MonthName DD, YYYY if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and (pos - 1): month = month2num( nzbtitle_exploded[pos - 2]) if month: day = check_int( nzbtitle_exploded[ pos - 1].rstrip(','), 1) try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 2 break except ValueError: regex_pass = 0 pos += 1 # YYYY MM or YYYY MM DD if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year and pos + 1 < len( nzbtitle_exploded): month = check_int( nzbtitle_exploded[pos + 1], 0) if month: if pos + 2 < len( nzbtitle_exploded): day = check_int( nzbtitle_exploded[pos + 2], 1) else: day = 1 try: _ = datetime.date( year, month, day) newdatish = "%04d-%02d-%02d" % ( year, month, day) regex_pass = 3 break except ValueError: regex_pass = 0 pos += 1 # Issue/No/Nr/Vol nn, YYYY or Issue/No/Nr/Vol nn if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): if nzbtitle_exploded[pos].lower() in [ "issue", "no", "nr", "vol" ]: if pos + 1 < len(nzbtitle_exploded): issue = check_int( nzbtitle_exploded[pos + 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 if pos + 2 < len( nzbtitle_exploded): year = check_year( nzbtitle_exploded[pos + 2]) if year and year < int( datetime.date. today().year): newdatish = '0' # it's old regex_pass = 4 # Issue/No/Nr/Vol nn, YYYY else: regex_pass = 5 # Issue/No/Nr/Vol nn break pos += 1 # nn YYYY issue number without "Nr" before it if not regex_pass: pos = 1 while pos < len(nzbtitle_exploded): year = check_year(nzbtitle_exploded[pos]) if year: issue = check_int( nzbtitle_exploded[pos - 1], 0) if issue: newdatish = str( issue) # 4 == 04 == 004 regex_pass = 6 if year < int(datetime.date.today( ).year): newdatish = '0' # it's old break pos += 1 # issue and year as a single 6 digit string eg 222015 if not regex_pass: pos = 0 while pos < len(nzbtitle_exploded): issue = nzbtitle_exploded[pos] if issue.isdigit() and len(issue) == 6: year = int(issue[2:]) issue = int(issue[:2]) newdatish = str( issue) # 4 == 04 == 004 regex_pass = 7 if year < int( datetime.date.today().year): newdatish = '0' # it's old break pos += 1 if not regex_pass: logger.debug( 'Magazine %s not in a recognised date format.' % nzbtitle_formatted) bad_date += 1 # allow issues with good name but bad date to be included # so user can manually select them, incl those with issue numbers newdatish = "1970-01-01" # provide a fake date for bad-date issues regex_pass = 99 if rejected: rejects += 1 else: if lazylibrarian.LOGLEVEL > 2: logger.debug("regex %s [%s] %s" % (regex_pass, nzbtitle_formatted, newdatish)) # wanted issues go into wanted table marked "Wanted" # the rest into pastissues table marked "Skipped" insert_table = "pastissues" insert_status = "Skipped" control_date = results['IssueDate'] if control_date is None: # we haven't got any copies of this magazine yet # get a rough time just over a month ago to compare to, in format yyyy-mm-dd # could perhaps calc differently for weekly, biweekly etc # or for magazines with only an issue number, use zero if str(newdatish).isdigit(): logger.debug( 'Magazine comparing issue numbers (%s)' % newdatish) control_date = 0 elif re.match('\d+-\d\d-\d\d', str(newdatish)): start_time = time.time() start_time -= int( lazylibrarian.CONFIG['MAG_AGE'] ) * 24 * 60 * 60 # number of seconds in days if start_time < 0: # limit of unixtime (1st Jan 1970) start_time = 0 control_date = time.strftime( "%Y-%m-%d", time.localtime(start_time)) logger.debug( 'Magazine date comparing to %s' % control_date) else: logger.debug( 'Magazine unable to find comparison type [%s]' % newdatish) control_date = 0 if str(control_date).isdigit() and str( newdatish).isdigit(): # for issue numbers, check if later than last one we have comp_date = int(newdatish) - int(control_date) newdatish = "%s" % newdatish newdatish = newdatish.zfill( 4) # pad so we sort correctly elif re.match('\d+-\d\d-\d\d', str(control_date)) and \ re.match('\d+-\d\d-\d\d', str(newdatish)): # only grab a copy if it's newer than the most recent we have, # or newer than a month ago if we have none comp_date = datecompare( newdatish, control_date) else: # invalid comparison of date and issue number if re.match('\d+-\d\d-\d\d', str(control_date)): logger.debug( 'Magazine %s failed: Expecting a date' % nzbtitle_formatted) else: logger.debug( 'Magazine %s failed: Expecting issue number' % nzbtitle_formatted) bad_date += 1 newdatish = "1970-01-01" # this is our fake date for ones we can't decipher comp_date = 0 if comp_date > 0: # keep track of what we're going to download so we don't download dupes new_date += 1 issue = bookid + ',' + newdatish if issue not in issues: maglist.append({ 'bookid': bookid, 'nzbprov': nzbprov, 'nzbtitle': nzbtitle, 'nzburl': nzburl, 'nzbmode': nzbmode }) logger.debug( 'This issue of %s is new, downloading' % nzbtitle_formatted) issues.append(issue) logger.debug('Magazine request number %s' % len(issues)) if lazylibrarian.LOGLEVEL > 2: logger.debug(str(issues)) insert_table = "wanted" insert_status = "Wanted" nzbdate = now() # when we asked for it else: logger.debug( 'This issue of %s is already flagged for download' % issue) else: if newdatish != "1970-01-01": # this is our fake date for ones we can't decipher logger.debug( 'This issue of %s is old; skipping.' % nzbtitle_formatted) old_date += 1 # store only the _new_ matching results # Don't add a new entry if this issue has been found on an earlier search # and status has been user-set ( we only delete the "Skipped" ones ) # In "wanted" table it might be already snatched/downloading/processing mag_entry = myDB.match( 'SELECT * from %s WHERE NZBtitle=? and NZBprov=?' % insert_table, (nzbtitle, nzbprov)) if mag_entry: if lazylibrarian.LOGLEVEL > 2: logger.debug( '%s is already in %s marked %s' % (nzbtitle, insert_table, insert_status)) else: controlValueDict = { "NZBtitle": nzbtitle, "NZBprov": nzbprov } newValueDict = { "NZBurl": nzburl, "BookID": bookid, "NZBdate": nzbdate, "AuxInfo": newdatish, "Status": insert_status, "NZBsize": nzbsize, "NZBmode": nzbmode } myDB.upsert(insert_table, newValueDict, controlValueDict) if lazylibrarian.LOGLEVEL > 2: logger.debug('Added %s to %s marked %s' % (nzbtitle, insert_table, insert_status)) msg = 'Found %i result%s for %s. %i new,' % ( total_nzbs, plural(total_nzbs), bookid, new_date) msg += ' %i old, %i fail date, %i fail name,' % ( old_date, bad_date, bad_name) msg += ' %i rejected: %i to download' % (rejects, len(maglist)) logger.info(msg) for magazine in maglist: if magazine['nzbmode'] in ["torznab", "torrent", "magnet"]: snatch = TORDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') else: snatch = NZBDownloadMethod(magazine['bookid'], magazine['nzbtitle'], magazine['nzburl'], 'magazine') if snatch: logger.info( 'Downloading %s from %s' % (magazine['nzbtitle'], magazine["nzbprov"])) notify_snatch("Magazine %s from %s at %s" % (unaccented(magazine['nzbtitle']), magazine["nzbprov"], now())) custom_notify_snatch(magazine['bookid']) scheduleJob(action='Start', target='processDir') if reset: scheduleJob(action='Restart', target='search_magazines') logger.info("Search for magazines complete") except Exception: logger.error('Unhandled exception in search_magazines: %s' % traceback.format_exc()) finally: threading.currentThread().name = "WEBSERVER"
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.GB_API: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.GB_API jsonresults, in_cache = get_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookname) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted # this book booklang = jsonresults['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug( 'Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None series = None seriesNum = None try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = None try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = None except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum is None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def find_results(self, authorname=None, queue=None): myDB = database.DBConnection() resultlist = [] # See if we should check ISBN field, otherwise ignore it api_strings = ['inauthor:', 'intitle:'] if is_valid_isbn(authorname): api_strings = ['isbn:'] api_hits = 0 logger.debug( 'Now searching Google Books API with keyword: ' + self.name) for api_value in api_strings: startindex = 0 if api_value == "isbn:": set_url = self.url + urllib.quote(api_value + self.name.encode(lazylibrarian.SYS_ENCODING)) else: set_url = self.url + \ urllib.quote(api_value + '"' + self.name.encode(lazylibrarian.SYS_ENCODING) + '"') try: startindex = 0 resultcount = 0 ignored = 0 number_results = 1 total_count = 0 no_author_count = 0 while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = get_json_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits = api_hits + 1 number_results = jsonresults['totalItems'] logger.debug('Searching url: ' + URL) if number_results == 0: logger.warn( 'Found no results for %s with value: %s' % (api_value, self.name)) break else: pass except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Skipped a result without authorfield.') no_author_count = no_author_count + 1 continue valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if "All" not in valid_langs: # don't care about languages, accept all try: # skip if language is not in valid list - booklang = item['volumeInfo']['language'] if booklang not in valid_langs: logger.debug( 'Skipped a book with language %s' % booklang) ignored = ignored + 1 continue except KeyError: ignored = ignored + 1 logger.debug( 'Skipped a result where no language is found') continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' bookdate = bookdate[:4] try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = '0' try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = 'Not available' try: num_reviews = item['volumeInfo']['ratingsCount'] except KeyError: num_reviews = 0 try: if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = 0 except KeyError: bookisbn = 0 author_fuzz = fuzz.token_set_ratio(Author, authorname) book_fuzz = fuzz.token_set_ratio( item['volumeInfo']['title'], authorname) isbn_fuzz = 0 if is_valid_isbn(authorname): isbn_fuzz = 100 highest_fuzz = max(author_fuzz, book_fuzz, isbn_fuzz) bookname = item['volumeInfo']['title'] dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace bookid = item['id'] author = myDB.select( 'SELECT AuthorID FROM authors WHERE AuthorName = "%s"' % Author.replace('"', '""')) if author: AuthorID = author[0]['authorid'] else: AuthorID = '' resultlist.append({ 'authorname': Author, 'authorid': AuthorID, 'bookid': bookid, 'bookname': bookname, 'booksub': booksub, 'bookisbn': bookisbn, 'bookpub': bookpub, 'bookdate': bookdate, 'booklang': booklang, 'booklink': item['volumeInfo']['canonicalVolumeLink'], 'bookrate': float(bookrate), 'bookimg': bookimg, 'bookpages': bookpages, 'bookgenre': bookgenre, 'bookdesc': bookdesc, 'author_fuzz': author_fuzz, 'book_fuzz': book_fuzz, 'isbn_fuzz': isbn_fuzz, 'highest_fuzz': highest_fuzz, 'num_reviews': num_reviews }) resultcount = resultcount + 1 except KeyError: break logger.debug("Found %s total result%s" % (total_count, plural(total_count))) logger.debug("Removed %s bad language result%s" % (ignored, plural(ignored))) logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count))) logger.debug( "Showing %s result%s for (%s) with keyword: %s" % (resultcount, plural(resultcount), api_value, authorname)) logger.debug( 'The Google Books API was hit %s time%s for keyword %s' % (api_hits, plural(api_hits), self.name)) queue.put(resultlist)
def get_author_books(self, authorid=None, authorname=None, refresh=False): logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + urllib.quote('inauthor:"%s"' % unaccented_str(authorname)) URL = set_url + '&' + urllib.urlencode(self.params) books_dict = [] api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = get_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits = api_hits + 1 number_results = jsonresults['totalItems'] except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = "" except KeyError: bookisbn = "" isbnhead = "" if len(bookisbn) == 10: isbnhead = bookisbn[0:3] try: booklang = item['volumeInfo']['language'] except KeyError: booklang = "Unknown" # do we care about language? if "All" not in valid_langs: if bookisbn != "": # seems google lies to us, sometimes tells us books # are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)) if (match): booklang = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # librarything returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + \ bookisbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if (resp != 'invalid' and resp != 'unknown'): booklang = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"LT language: " + booklang) except Exception as e: booklang = "" logger.error("Error finding language: %s" % str(e)) if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand # this list booknamealt = item['volumeInfo']['title'] logger.debug("%s Google thinks [%s], we think [%s]" % (booknamealt, googlelang, booklang)) gb_lang_change = gb_lang_change + 1 else: match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)) if (not match): myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"GB language: " + booklang) # skip if language is in ignore list if booklang not in valid_langs: booknamealt = item['volumeInfo']['title'] logger.debug( 'Skipped [%s] with language %s' % (booknamealt, booklang)) ignored = ignored + 1 continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None if booksub is None: series = None seriesNum = None else: try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = None try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = None bookname = item['volumeInfo']['title'] bookname = unaccented(bookname) dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) bookid = item['id'] # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # # Not sure if googlebooks does too, but we only want one... find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] locked = resulted['Manual'] else: book_status = lazylibrarian.NEWBOOK_STATUS locked = False rejected = False if re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults = removedResults + 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults = removedResults + 1 rejected = True if not rejected: find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' % (bookname.replace('"', '""'), authorname.replace('"', '""'))) if find_books: for find_book in find_books: if find_book['BookID'] != bookid: # we have a book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (find_book['BookID'], authorname, bookname, bookid)) rejected = True duplicates = duplicates + 1 if not rejected: find_books = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_books: # we have a book with this bookid already logger.debug('Rejecting bookid %s for [%s][%s] already got this bookid in database' % (bookid, authorname, bookname)) duplicates = duplicates + 1 rejected = True if not rejected: if book_status != "Ignored" and not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": authorid, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } resultcount = resultcount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + bookname + " " + bookdate) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum is None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug("[%s] Added book: %s [%s]" % (authorname, bookname, booklang)) added_count = added_count + 1 else: updated_count = updated_count + 1 logger.debug("[%s] Updated book: %s" % (authorname, bookname)) else: book_ignore_count = book_ignore_count + 1 except KeyError: pass logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) lastbook = myDB.match('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) return books_dict
def search_rss_book(books=None, reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: if books is None: threading.currentThread().name = "SEARCHALLRSS" else: threading.currentThread().name = "SEARCHRSS" if not (lazylibrarian.USE_RSS()): logger.warn('RSS search is disabled') scheduleJob(action='Stop', target='search_rss_book') return if not internet(): logger.warn('Search RSS Book: No internet connection') return myDB = database.DBConnection() resultlist, wishproviders = IterateOverGoodReads() if not wishproviders: logger.debug('No rss wishlists are set') else: # for each item in resultlist, add to database if necessary, and mark as wanted for book in resultlist: # we get rss_author, rss_title, rss_isbn, rss_bookid (goodreads bookid) # we can just use bookid if goodreads, or try isbn and name matching on author/title if googlebooks # not sure if anyone would use a goodreads wishlist if not using goodreads interface... logger.debug('Processing %s item%s in wishlists' % (len(resultlist), plural(len(resultlist)))) if book['rss_bookid'] and lazylibrarian.CONFIG[ 'BOOK_API'] == "GoodReads": bookmatch = myDB.match( 'select Status,BookName from books where bookid="%s"' % book['rss_bookid']) if bookmatch: bookstatus = bookmatch['Status'] bookname = bookmatch['BookName'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s, already marked as "%s"' % (bookname, bookstatus)) else: # skipped/ignored logger.info(u'Found book %s, marking as "Wanted"' % bookname) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: import_book(book['rss_bookid']) else: item = {} headers = [] item['Title'] = book['rss_title'] if book['rss_bookid']: item['BookID'] = book['rss_bookid'] headers.append('BookID') if book['rss_isbn']: item['ISBN'] = book['rss_isbn'] headers.append('ISBN') bookmatch = finditem(item, book['rss_author'], headers) if bookmatch: # it's already in the database authorname = bookmatch['AuthorName'] bookname = bookmatch['BookName'] bookid = bookmatch['BookID'] bookstatus = bookmatch['Status'] if bookstatus in ['Open', 'Wanted', 'Have']: logger.info( u'Found book %s by %s, already marked as "%s"' % (bookname, authorname, bookstatus)) else: # skipped/ignored logger.info( u'Found book %s by %s, marking as "Wanted"' % (bookname, authorname)) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Wanted"} myDB.upsert("books", newValueDict, controlValueDict) else: # not in database yet results = '' if book['rss_isbn']: results = search_for(book['rss_isbn']) if results: result = results[0] if result['isbn_fuzz'] > lazylibrarian.CONFIG[ 'MATCH_RATIO']: logger.info( "Found (%s%%) %s: %s" % (result['isbn_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not results: searchterm = "%s <ll> %s" % ( item['Title'], formatAuthorName(book['rss_author'])) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.info( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) import_book(result['bookid']) bookmatch = True if not bookmatch: msg = "Skipping book %s by %s" % (item['Title'], book['rss_author']) # noinspection PyUnboundLocalVariable if not results: msg += ', No results returned' logger.warn(msg) else: msg += ', No match found' logger.warn(msg) msg = "Closest match (%s%% %s%%) %s: %s" % ( result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname']) logger.warn(msg) if books is None: # We are performing a backlog search cmd = 'SELECT BookID, AuthorName, Bookname, BookSub, BookAdded from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and books.Status="Wanted" order by BookAdded desc' searchbooks = myDB.select(cmd) else: # The user has added a new book searchbooks = [] for book in books: cmd = 'SELECT BookID, AuthorName, BookName, BookSub from books,authors ' cmd += 'WHERE books.AuthorID = authors.AuthorID and BookID="%s" ' % book[ 'bookid'] cmd += 'AND books.Status="Wanted"' searchbook = myDB.select(cmd) for terms in searchbook: searchbooks.append(terms) if len(searchbooks) == 0: return resultlist, nproviders = IterateOverRSSSites() if not nproviders: if not wishproviders: logger.warn('No rss providers are set, check config') return # No point in continuing logger.info('RSS Searching for %i book%s' % (len(searchbooks), plural(len(searchbooks)))) rss_count = 0 for book in searchbooks: authorname, bookname = get_searchterm(book, "book") found = processResultList(resultlist, authorname, bookname, book, 'book') # if you can't find the book, try title without any "(extended details, series etc)" if not found and '(' in bookname: # anything to shorten? authorname, bookname = get_searchterm(book, "shortbook") found = processResultList(resultlist, authorname, bookname, book, 'shortbook') if not found: logger.debug( "Searches returned no results. Adding book %s - %s to queue." % (authorname, bookname)) if found > True: rss_count += 1 logger.info("RSS Search for Wanted items complete, found %s book%s" % (rss_count, plural(rss_count))) if reset: scheduleJob(action='Restart', target='search_rss_book') except Exception: logger.error('Unhandled exception in search_rss_book: %s' % traceback.format_exc())
def GEN(book=None, prov=None): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urllib.urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug(u"No results found from %s for %s" % (provider, book['searchterm'])) elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if result: logger.debug(u'Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result) try: table = soup.findAll('table')[2] # un-named table if table: rows = table.findAll('tr') except IndexError: # no results table in result page rows = [] if 'search.php' in search and len(rows) > 1: rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.findAll('td') if 'index.php' in search and len(td) > 3: try: res = str( BeautifulStoneSoup( td[0].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) author = formatAuthorName(res) title = str( BeautifulStoneSoup( td[2].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) temp = str(td[4]) temp = temp.split('onmouseout')[1] extn = temp.split('">')[1].split('(')[0] size = temp.split('">')[1].split('(')[1].split( ')')[0] size = size.upper() link = temp.split('href=')[1].split('"')[1] except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: res = str( BeautifulStoneSoup( td[1].text, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) author = formatAuthorName(res) title = str( td[2]).split('>')[2].split('<')[0].strip() title = str( BeautifulStoneSoup( title, convertEntities=BeautifulStoneSoup. HTML_ENTITIES)) link = str(td[2]).split('href="')[1].split( '?')[1].split('"')[0] size = unaccented(td[7].text).upper() extn = td[8].text except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) if not size: size = 0 else: try: mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if not link.startswith('http'): if "/ads.php?" in link: url = url_fix(host + link) else: url = url_fix(host + "/ads.php?" + link) else: url = redirect_url(host, link) bookresult, success = fetchURL(url) if not success: # may return 404 if no results, not really an error if '404' in bookresult: logger.debug( u"No results found from %s for %s" % (provider, book['searchterm'])) else: logger.debug(url) logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) errmsg = bookresult bookresult = False if bookresult: url = None try: new_soup = BeautifulSoup(bookresult) for link in new_soup.findAll('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.debug( 'Error parsing bookresult for %s: %s' % (link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error(u"An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug( u"Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, book['searchterm'])) return results, errmsg
def dbupgrade(db_current_version): try: myDB = database.DBConnection() db_version = 0 result = myDB.match('PRAGMA user_version') if result and result[0]: value = str(result[0]) if value.isdigit(): db_version = int(value) check = myDB.match('PRAGMA integrity_check') if check and check[0]: result = check[0] if result == 'ok': logger.debug('Database integrity check: %s' % result) else: logger.error('Database integrity check: %s' % result) # should probably abort now if db_version < db_current_version: myDB = database.DBConnection() if db_version < 1: if not has_column(myDB, "authors", "AuthorID"): # it's a new database. Create tables but no need for any upgrading db_version = db_current_version lazylibrarian.UPDATE_MSG = 'Creating new database, version %s' % db_version else: lazylibrarian.UPDATE_MSG = 'Updating database to version %s, current version is %s' % ( db_current_version, db_version) logger.info(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS authors (AuthorID TEXT UNIQUE, AuthorName TEXT UNIQUE, \ AuthorImg TEXT, AuthorLink TEXT, DateAdded TEXT, Status TEXT, LastBook TEXT, LastBookImg TEXT, \ LastLink Text, LastDate TEXT, HaveBooks INTEGER, TotalBooks INTEGER, AuthorBorn TEXT, \ AuthorDeath TEXT, UnignoredBooks INTEGER, Manual TEXT)') myDB.action('CREATE TABLE IF NOT EXISTS books (AuthorID TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, BookFile TEXT, \ BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, Manual TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS wanted (BookID TEXT, NZBurl TEXT, NZBtitle TEXT, NZBdate TEXT, \ NZBprov TEXT, Status TEXT, NZBsize TEXT, AuxInfo TEXT, NZBmode TEXT, Source TEXT, DownloadID TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS pastissues AS SELECT * FROM wanted WHERE 0' ) # same columns myDB.action( 'CREATE TABLE IF NOT EXISTS magazines (Title TEXT UNIQUE, Regex TEXT, Status TEXT, \ MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT, \ LatestCover TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS languages (isbn TEXT, lang TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS issues (Title TEXT, IssueID TEXT UNIQUE, IssueAcquired TEXT, \ IssueDate TEXT, IssueFile TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS stats (authorname text, GR_book_hits int, GR_lang_hits int, \ LT_lang_hits int, GB_lang_change, cache_hits int, bad_lang int, bad_char int, uncached int, \ duplicates int)') myDB.action( 'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ Status TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)' ) myDB.action( 'CREATE TABLE IF NOT EXISTS seriesauthors (SeriesID INTEGER, AuthorID TEXT)' ) # These are the incremental changes before database versioning was introduced. # Old database tables might already have these incorporated depending on version, so we need to check... if db_version < 1: if not has_column(myDB, "books", "BookSub"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book subtitles.' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookSub TEXT') if not has_column(myDB, "books", "BookSub"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book publisher' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookPub TEXT') if not has_column(myDB, "books", "BookGenre"): lazylibrarian.UPDATE_MSG = 'Updating database to hold bookgenre' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookGenre TEXT') if not has_column(myDB, "books", "BookFile"): lazylibrarian.UPDATE_MSG = 'Updating database to hold book filename' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN BookFile TEXT') if not has_column(myDB, "wanted", "AuxInfo"): lazylibrarian.UPDATE_MSG = 'Updating database to hold AuxInfo' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN AuxInfo TEXT') if not has_column(myDB, "wanted", "NZBsize"): lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBsize' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN NZBsize TEXT') if not has_column(myDB, "wanted", "NZBmode"): lazylibrarian.UPDATE_MSG = 'Updating database to hold NZBmode' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN NZBmode TEXT') if not has_column(myDB, "authors", "UnignoredBooks"): lazylibrarian.UPDATE_MSG = 'Updating database to hold UnignoredBooks' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE authors ADD COLUMN UnignoredBooks INTEGER' ) if not has_column(myDB, "magazines", "IssueStatus"): lazylibrarian.UPDATE_MSG = 'Updating database to hold IssueStatus' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE magazines ADD COLUMN IssueStatus TEXT') addedWorkPage = False if not has_column(myDB, "books", "WorkPage"): lazylibrarian.UPDATE_MSG = 'Updating database to hold WorkPage' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN WorkPage TEXT') addedWorkPage = True addedSeries = False if not has_column(myDB, "series", "SeriesID") and not has_column( myDB, "books", "Series"): lazylibrarian.UPDATE_MSG = 'Updating database to hold Series' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN Series TEXT') addedSeries = True # SeriesOrder shouldn't be an integer, some later written books # and novellas logically go inbetween books of the main series, # and their SeriesOrder is not an integer, eg 1.5 # so we need to update SeriesOrder to store as text. # Because sqlite can't drop columns we create a new column SeriesNum, # inherit the old column values, and use SeriesNum instead if not has_column(myDB, "books", "SeriesNum") and has_column( myDB, "books", "SeriesOrder"): # no SeriesNum column, so create one lazylibrarian.UPDATE_MSG = 'Updating books to hold SeriesNum' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN SeriesNum TEXT') myDB.action('UPDATE books SET SeriesNum = SeriesOrder') myDB.action('UPDATE books SET SeriesOrder = Null') addedIssues = False if not has_column(myDB, "issues", "Title"): lazylibrarian.UPDATE_MSG = 'Updating database to hold Issues table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE issues (Title TEXT, IssueID TEXT, IssueAcquired TEXT, IssueDate TEXT, IssueFile TEXT)' ) addedIssues = True if not has_column(myDB, "issues", "IssueID"): lazylibrarian.UPDATE_MSG = 'Updating Issues table to hold IssueID' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE issues ADD COLUMN IssueID TEXT') addedIssues = True myDB.action('DROP TABLE if exists capabilities') if addedIssues: try: magazinescan.magazineScan() except Exception as e: logger.debug("Failed to scan magazines, %s" % str(e)) if addedWorkPage: try: lazylibrarian.UPDATE_MSG = 'Adding WorkPage to existing books' logger.debug(lazylibrarian.UPDATE_MSG) threading.Thread(target=bookwork.setWorkPages, name="ADDWORKPAGE", args=[]).start() except Exception as e: logger.debug("Failed to update WorkPages, %s" % str(e)) if addedSeries: try: books = myDB.select( 'SELECT BookID, BookName FROM books') if books: lazylibrarian.UPDATE_MSG = 'Adding series to existing books' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(books) cnt = 0 for book in books: cnt += 1 lazylibrarian.UPDATE_MSG = 'Adding series to existing books: %s of %s' % ( cnt, tot) series, seriesNum = bookSeries( book["BookName"]) if series: controlValueDict = { "BookID": book["BookID"] } newValueDict = { "series": series, "seriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) except Exception as e: logger.error('Error: ' + str(e)) if db_version < 2: try: results = myDB.select( 'SELECT BookID,NZBsize FROM wanted WHERE NZBsize LIKE "% MB"' ) if results: lazylibrarian.UPDATE_MSG = 'Removing units from wanted table' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(results) cnt = 0 for units in results: cnt += 1 lazylibrarian.UPDATE_MSG = 'Removing units from wanted table: %s of %s' % ( cnt, tot) nzbsize = units["NZBsize"] nzbsize = nzbsize.split(' ')[0] myDB.action( 'UPDATE wanted SET NZBsize = "%s" WHERE BookID = "%s"' % (nzbsize, units["BookID"])) except Exception as e: logger.error('Error: ' + str(e)) if db_version < 3: if has_column(myDB, "books", "SeriesOrder"): lazylibrarian.UPDATE_MSG = 'Removing SeriesOrder from books table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS temp_books (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \ SeriesNum TEXT, WorkPage TEXT)') myDB.action( 'INSERT INTO temp_books SELECT AuthorID,AuthorName,AuthorLink,BookName,BookSub, \ BookDesc,BookGenre,BookIsbn,BookPub,BookRate,BookImg,BookPages,BookLink,BookID, \ BookFile,BookDate,BookLang,BookAdded,Status,Series,SeriesNum,WorkPage FROM books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_books RENAME TO books') if not has_column(myDB, "pastissues", "BookID"): lazylibrarian.UPDATE_MSG = 'Moving magazine past issues into new table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0' ) myDB.action( 'DELETE FROM wanted WHERE Status="Skipped" AND length(AuxInfo) > 0' ) if db_version < 4: if not has_column(myDB, "stats", "duplicates"): lazylibrarian.UPDATE_MSG = 'Updating stats table to hold duplicates' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE stats ADD COLUMN duplicates INT') if db_version < 5: issues = myDB.select( 'SELECT IssueID,IssueDate from issues WHERE length(IssueDate) < 4 and length(IssueDate) > 0' ) if issues: lazylibrarian.UPDATE_MSG = 'Updating issues table to hold 4 digit issue numbers' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(issues) cnt = 0 for issue in issues: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating issues table 4 digits: %s of %s' % ( cnt, tot) issueid = issue['IssueID'] issuedate = str(issue['IssueDate']) issuedate = issuedate.zfill(4) myDB.action( 'UPDATE issues SET IssueDate="%s" WHERE IssueID="%s"' % (issuedate, issueid)) mags = myDB.select( 'SELECT Title,IssueDate from magazines WHERE length(IssueDate) < 4 and length(IssueDate) > 0' ) if mags: lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits' logger.debug(lazylibrarian.UPDATE_MSG) tot = len(mags) cnt = 0 for mag in mags: cnt += 1 lazylibrarian.UPDATE_MSG = 'Updating magazines table to 4 digits: %s of %s' % ( cnt, tot) title = mag['Title'] issuedate = str(mag['IssueDate']) issuedate = issuedate.zfill(4) myDB.action( 'UPDATE magazines SET IssueDate="%s" WHERE Title="%s"' % (issuedate, title)) if db_version < 6: if not has_column(myDB, "books", "Manual"): lazylibrarian.UPDATE_MSG = 'Updating books table to hold Manual setting' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE books ADD COLUMN Manual TEXT') if db_version < 7: if not has_column(myDB, "wanted", "Source"): lazylibrarian.UPDATE_MSG = 'Updating wanted table to hold Source and DownloadID' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE wanted ADD COLUMN Source TEXT') myDB.action( 'ALTER TABLE wanted ADD COLUMN DownloadID TEXT') if db_version < 8: src = os.path.join(lazylibrarian.PROG_DIR, 'data/images/cache/') dst = lazylibrarian.CACHEDIR images = myDB.select( 'SELECT AuthorID, AuthorImg FROM authors WHERE AuthorImg LIKE "images/cache/%"' ) if images: logger.debug('Moving author images to new location') tot = len(images) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % ( cnt, tot) img = image['AuthorImg'] img = img[7:] myDB.action( 'UPDATE authors SET AuthorImg="%s" WHERE AuthorID="%s"' % (img, image['AuthorID'])) img = img[6:] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(os.path.join(src, img), os.path.join(dst, img)) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) logger.debug("Author Image cache updated") images = myDB.select( 'SELECT BookID, BookImg FROM books WHERE BookImg LIKE "images/cache/%"' ) if images: logger.debug('Moving book images to new location') tot = len(images) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % ( cnt, tot) img = image['BookImg'] img = img[7:] myDB.action( 'UPDATE books SET BookImg="%s" WHERE BookID="%s"' % (img, image['BookID'])) img = img[6:] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(srcfile, os.path.join(dst, img)) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) logger.debug("Book Image cache updated") if db_version < 9: if not has_column(myDB, "magazines", "Reject"): # remove frequency column, rename regex to reject, add new regex column for searches lazylibrarian.UPDATE_MSG = 'Updating magazines table' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (Title TEXT, Regex TEXT, Status TEXT, \ MagazineAdded TEXT, LastAcquired TEXT, IssueDate TEXT, IssueStatus TEXT, Reject TEXT)' ) myDB.action( 'INSERT INTO temp_table SELECT Title, Regex, Status, MagazineAdded, LastAcquired, \ IssueDate, IssueStatus, Regex FROM magazines') myDB.action('DROP TABLE magazines') myDB.action('ALTER TABLE temp_table RENAME TO magazines') myDB.action('UPDATE magazines SET Regex = Null') if db_version < 10: # make sure columns in pastissues match those in wanted table # needed when upgrading from old 3rd party packages (eg freenas) myDB.action('DROP TABLE pastissues') myDB.action( 'CREATE TABLE pastissues AS SELECT * FROM wanted WHERE 0' ) # same columns, but empty table if db_version < 11: # keep last book image if not has_column(myDB, "authors", "LastBookImg"): lazylibrarian.UPDATE_MSG = 'Updating author table to hold last book image' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE authors ADD COLUMN LastBookImg TEXT') books = myDB.select( 'SELECT AuthorID, AuthorName, LastBook from authors') if books: for book in books: lazylibrarian.UPDATE_MSG = 'Updating last book image for %s' % book[ 'AuthorName'] if book['LastBook']: match = myDB.match( 'SELECT BookImg from books WHERE AuthorID="%s" AND BookName="%s"' % (book['AuthorID'], book['LastBook'])) if match: myDB.action( 'UPDATE authors SET LastBookImg="%s" WHERE AuthorID=%s' % (match['BookImg'], book['AuthorID'])) if db_version < 12: # keep last magazine issue image if not has_column(myDB, "Magazines", "LatestCover"): lazylibrarian.UPDATE_MSG = 'Updating magazine table to hold last issue image' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action( 'ALTER TABLE magazines ADD COLUMN LatestCover TEXT') mags = myDB.select( 'SELECT Title, LastAcquired from magazines') if mags: for mag in mags: lazylibrarian.UPDATE_MSG = 'Updating last issue image for %s' % mag[ 'Title'] match = myDB.match( 'SELECT IssueFile from issues WHERE IssueAcquired="%s" AND Title="%s"' % (mag['LastAcquired'], mag['Title'])) if match: coverfile = os.path.splitext( match['IssueFile'])[0] + '.jpg' if os.path.exists(coverfile): myDB.action( 'UPDATE magazines SET LatestCover="%s" WHERE Title="%s"' % (coverfile, mag['Title'])) if db_version < 13: if not has_column(myDB, "authors", "Manual"): lazylibrarian.UPDATE_MSG = 'Updating authors table to hold Manual setting' logger.debug(lazylibrarian.UPDATE_MSG) myDB.action('ALTER TABLE authors ADD COLUMN Manual TEXT') if db_version < 14: src = lazylibrarian.CACHEDIR try: os.mkdir(os.path.join(src, 'author')) except OSError as e: if e.errno is not 17: # already exists is ok logger.debug('mkdir author cache reports: %s' % str(e)) query = 'SELECT AuthorName, AuthorID, AuthorImg FROM authors ' query += 'WHERE AuthorImg LIKE "cache/%" ' query += 'AND AuthorImg NOT LIKE "cache/author/%"' images = myDB.select(query) if images: tot = len(images) logger.debug('Moving %s author images to new location' % tot) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving author images to new location: %s of %s" % ( cnt, tot) try: img = image['AuthorImg'] img = img.rsplit('/', 1)[1] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move( srcfile, os.path.join(src, "author", img)) myDB.action( 'UPDATE authors SET AuthorImg="cache/author/%s" WHERE AuthorID="%s"' % (img, image['AuthorID'])) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) except Exception as e: logger.warn( 'Failed to update author image for %s: %s' % (image['AuthorName'], str(e))) logger.debug("Author Image cache updated") try: os.mkdir(os.path.join(src, 'book')) except OSError as e: if e.errno is not 17: # already exists is ok logger.debug('mkdir book cache reports: %s' % str(e)) query = 'SELECT BookName, BookID, BookImg FROM books ' query += 'WHERE BookImg LIKE "cache/%" ' query += 'AND BookImg NOT LIKE "cache/book/%"' images = myDB.select(query) if images: tot = len(images) logger.debug('Moving %s book images to new location' % tot) cnt = 0 for image in images: cnt += 1 lazylibrarian.UPDATE_MSG = "Moving book images to new location: %s of %s" % ( cnt, tot) try: img = image['BookImg'] img = img.rsplit('/', 1)[1] srcfile = os.path.join(src, img) if os.path.isfile(srcfile): try: shutil.move(srcfile, os.path.join(src, "book", img)) myDB.action( 'UPDATE books SET BookImg="cache/book/%s" WHERE BookID="%s"' % (img, image['BookID'])) except Exception as e: logger.warn("dbupgrade: %s" % str(e)) except Exception as e: logger.warn( 'Failed to update book image for %s: %s' % (image['BookName'], str(e))) logger.debug("Book Image cache updated") # at this point there should be no more .jpg files in the root of the cachedir # any that are still there are for books/authors deleted from database # or magazine latest issue cover files that get copied as required for image in os.listdir(src): if image.endswith('.jpg'): os.remove(os.path.join(src, image)) if db_version < 15: myDB.action( 'CREATE TABLE IF NOT EXISTS series (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ AuthorID TEXT, Status TEXT)') myDB.action( 'CREATE TABLE IF NOT EXISTS member (SeriesID INTEGER, BookID TEXT, SeriesNum TEXT)' ) if has_column(myDB, "books", "SeriesNum"): lazylibrarian.UPDATE_MSG = 'Populating series and member tables' books = myDB.select( 'SELECT BookID, Series, SeriesNum from books') if books: tot = len(books) logger.debug("Updating book series for %s book%s" % (tot, plural(tot))) cnt = 0 for book in books: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating book series: %s of %s" % ( cnt, tot) seriesdict = getWorkSeries(book['BookID']) if not seriesdict: # no workpage series, use the current values if present if book['Series'] and book['SeriesNum']: seriesdict = { cleanName(unaccented(book['Series'])): book['SeriesNum'] } setSeries(seriesdict, book['BookID'], seriesauthors=False) # deleteEmptySeries # shouldn't be any on first run? lazylibrarian.UPDATE_MSG = "Book series update complete" logger.debug(lazylibrarian.UPDATE_MSG) lazylibrarian.UPDATE_MSG = 'Removing seriesnum from books table' myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, AuthorName TEXT, AuthorLink TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, Series TEXT, \ WorkPage TEXT, Manual TEXT)') myDB.action( 'INSERT INTO temp_table SELECT AuthorID, AuthorName, AuthorLink, BookName, BookSub, \ BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \ BookFile, BookDate, BookLang, BookAdded, Status, Series, WorkPage, Manual from books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_table RENAME TO books') lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete' if db_version < 16: if has_column(myDB, "books", "AuthorLink"): lazylibrarian.UPDATE_MSG = 'Removing series, authorlink and authorname from books table' myDB.action( 'CREATE TABLE IF NOT EXISTS temp_table (AuthorID TEXT, \ BookName TEXT, BookSub TEXT, BookDesc TEXT, BookGenre TEXT, BookIsbn TEXT, BookPub TEXT, \ BookRate INTEGER, BookImg TEXT, BookPages INTEGER, BookLink TEXT, BookID TEXT UNIQUE, \ BookFile TEXT, BookDate TEXT, BookLang TEXT, BookAdded TEXT, Status TEXT, WorkPage TEXT, \ Manual TEXT)') myDB.action( 'INSERT INTO temp_table SELECT AuthorID, BookName, BookSub, \ BookDesc, BookGenre, BookIsbn, BookPub, BookRate, BookImg, BookPages, BookLink, BookID, \ BookFile, BookDate, BookLang, BookAdded, Status, WorkPage, Manual from books' ) myDB.action('DROP TABLE books') myDB.action('ALTER TABLE temp_table RENAME TO books') lazylibrarian.UPDATE_MSG = 'Reorganisation of books table complete' if db_version < 17: if has_column(myDB, "series", "AuthorID"): lazylibrarian.UPDATE_MSG = 'Creating seriesauthors table' # In this version of the database there is only one author per series so use that as starting point myDB.action( 'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))' ) series = myDB.select( 'SELECT SeriesID,AuthorID from series') cnt = 0 tot = len(series) for item in series: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % ( cnt, tot) myDB.action( 'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)' % (item['SeriesID'], item['AuthorID']), suppress='UNIQUE') myDB.action('DROP TABLE temp_table') myDB.action( 'CREATE TABLE temp_table (SeriesID INTEGER PRIMARY KEY, SeriesName TEXT, \ Status TEXT)') myDB.action( 'INSERT INTO temp_table SELECT SeriesID, SeriesName, Status FROM series' ) myDB.action('DROP TABLE series') myDB.action('ALTER TABLE temp_table RENAME TO series') lazylibrarian.UPDATE_MSG = 'Reorganisation of series table complete' if db_version < 18: data = myDB.match('pragma index_list(seriesauthors)') if not data: lazylibrarian.UPDATE_MSG = 'Adding unique constraint to seriesauthors table' myDB.action('DROP TABLE IF EXISTS temp_table') myDB.action( 'ALTER TABLE seriesauthors RENAME to temp_table') myDB.action( 'CREATE TABLE seriesauthors (SeriesID INTEGER, AuthorID TEXT, UNIQUE (SeriesID,AuthorID))' ) series = myDB.select( 'SELECT SeriesID,AuthorID from temp_table') cnt = 0 tot = len(series) for item in series: cnt += 1 lazylibrarian.UPDATE_MSG = "Updating seriesauthors: %s of %s" % ( cnt, tot) myDB.action( 'insert into seriesauthors (SeriesID, AuthorID) values (%s, %s)' % (item['SeriesID'], item['AuthorID']), suppress='UNIQUE') myDB.action('DROP TABLE temp_table') lazylibrarian.UPDATE_MSG = 'Reorganisation of seriesauthors complete' # Now do any non-version-specific tidying try: authors = myDB.select( 'SELECT AuthorID FROM authors WHERE AuthorName IS NULL') if authors: logger.debug( 'Removing %s un-named author%s from database' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] myDB.action('DELETE from authors WHERE AuthorID="%s"' % authorid) myDB.action('DELETE from books WHERE AuthorID="%s"' % authorid) except Exception as e: logger.error('Error: ' + str(e)) myDB.action('PRAGMA user_version = %s' % db_current_version) lazylibrarian.UPDATE_MSG = 'Cleaning Database after upgrade' myDB.action('vacuum') lazylibrarian.UPDATE_MSG = 'Database updated to version %s' % db_current_version logger.info(lazylibrarian.UPDATE_MSG) restartJobs(start='Start') lazylibrarian.UPDATE_MSG = '' except Exception: logger.error('Unhandled exception in database update: %s' % traceback.format_exc()) lazylibrarian.UPDATE_MSG = ''
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = {"category": "books", "field": "seeders", "sorder": "desc"} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split( '.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() mult = 1 if 'K' in size: size = size.split('K')[0] mult = 1024 elif 'M' in size: size = size.split('M')[0] mult = 1024 * 1024 elif 'G' in size: size = size.split('G')[0] mult = 1024 * 1024 * 1024 size = int(float(size) * mult) except (ValueError, IndexError): size = 0 try: seeders = int(td[3].text) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getSeriesAuthors(seriesid): """ Get a list of authors contributing to a series and import those authors (and their books) into the database Return how many authors you added """ myDB = database.DBConnection() result = myDB.match("select count(*) as counter from authors") start = int(result['counter']) result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid, )) seriesname = result['SeriesName'] members, api_hits = getSeriesMembers(seriesid, seriesname) dic = { u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': '' } if members: myDB = database.DBConnection() for member in members: # order = member[0] bookname = member[1] authorname = member[2] # workid = member[3] authorid = member[4] # pubyear = member[5] bookname = replace_all(bookname, dic) if not authorid: # goodreads gives us all the info we need, librarything/google doesn't base_url = 'https://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} searchname = bookname + ' ' + authorname searchname = cleanName(unaccented(searchname)) if PY2: searchname = searchname.encode(lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode(params) try: rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: try: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) except (KeyError, AttributeError): booktitle = "" book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Author Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: # try again with title only searchname = cleanName(unaccented(bookname)) if PY2: searchname = searchname.encode( lazylibrarian.SYS_ENCODING) searchterm = quote_plus(searchname) set_url = base_url + searchterm + '&' + urlencode( params) rootxml, in_cache = gr_xml_request(set_url) if not in_cache: api_hits += 1 if rootxml is None: logger.warn('Error getting XML for %s' % searchname) else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text booktitle = replace_all(booktitle, dic) book_fuzz = fuzz.token_set_ratio( booktitle, bookname) if book_fuzz >= 98: try: author = item.find( './best_book/author/name').text except (KeyError, AttributeError): author = "" # try: # workid = item.find('./work/id').text # except (KeyError, AttributeError): # workid = "" try: authorid = item.find( './best_book/author/id').text except (KeyError, AttributeError): authorid = "" logger.debug( "Title Search found %s %s, authorid %s" % (author, booktitle, authorid)) break if not authorid: logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname)) except Exception as e: logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e))) if authorid: lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid) result = myDB.match("select count(*) as counter from authors") finish = int(result['counter']) newauth = finish - start logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname)) return newauth
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders']) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def getWorkSeries(bookID=None): """ Return the series names and numbers in series for the given id as a list of tuples For goodreads the id is a WorkID, for librarything it's a BookID """ myDB = database.DBConnection() serieslist = [] if not bookID: logger.error("getWorkSeries - No bookID") return serieslist if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads': URL = "https://www.goodreads.com/work/" seriesurl = URL + bookID + "/series?format=xml&key=" + lazylibrarian.CONFIG[ 'GR_API'] rootxml, in_cache = gr_xml_request(seriesurl) if rootxml is None: logger.warn('Error getting XML for %s' % seriesurl) else: resultxml = rootxml.getiterator('series_work') for item in resultxml: try: seriesname = item.find('./series/title').text seriesname = seriesname.strip('\n').strip('\n').strip() seriesid = item.find('./series/id').text seriesnum = item.find('./user_position').text except (KeyError, AttributeError): continue if seriesname and seriesid: seriesname = cleanName(unaccented(seriesname), '&/') if seriesname: seriesnum = cleanName(unaccented(seriesnum)) serieslist.append((seriesid, seriesnum, seriesname)) match = myDB.match( 'SELECT SeriesID from series WHERE SeriesName=?', (seriesname, )) if not match: match = myDB.match( 'SELECT SeriesName from series WHERE SeriesID=?', (seriesid, )) if not match: myDB.action( 'INSERT INTO series VALUES (?, ?, ?, ?, ?)', (seriesid, seriesname, "Active", 0, 0)) else: logger.warn( "Name mismatch for series %s, [%s][%s]" % (seriesid, seriesname, match['SeriesName'])) elif match['SeriesID'] != seriesid: myDB.action( 'UPDATE series SET SeriesID=? WHERE SeriesName=?', (seriesid, seriesname)) else: work = getBookWork(bookID, "Series") if work: try: slist = work.split('<h3><b>Series:')[1].split( '</h3>')[0].split('<a href="/series/') for item in slist[1:]: try: series = item.split('">')[1].split('</a>')[0] if series and '(' in series: seriesnum = series.split('(')[1].split( ')')[0].strip() series = series.split(' (')[0].strip() else: seriesnum = '' series = series.strip() seriesname = cleanName(unaccented(series), '&/') seriesnum = cleanName(unaccented(seriesnum)) if seriesname: serieslist.append(('', seriesnum, seriesname)) except IndexError: pass except IndexError: pass return serieslist
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = {"q": book['searchterm']} providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(',')[0].strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < int(seeders): results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def syncCalibreList(col_read=None, col_toread=None, userid=None): """ Get the lazylibrarian bookid for each read/toread calibre book so we can map our id to theirs, and sync current/supplied user's read/toread or supplied read/toread columns to calibre database. Return message giving totals """ myDB = database.DBConnection() if not userid: cookie = cherrypy.request.cookie if cookie and 'll_uid' in cookie.keys(): userid = cookie['ll_uid'].value if userid: res = myDB.match( 'SELECT UserName,ToRead,HaveRead,CalibreRead,CalibreToRead,Perms from users where UserID=?', (userid, )) if res: username = res['UserName'] if not col_read: col_read = res['CalibreRead'] if not col_toread: col_toread = res['CalibreToRead'] toreadlist = getList(res['ToRead']) readlist = getList(res['HaveRead']) # suppress duplicates (just in case) toreadlist = list(set(toreadlist)) readlist = list(set(readlist)) else: return "Error: Unable to get user column settings for %s" % userid if not userid: return "Error: Unable to find current userid" if not col_read and not col_toread: return "User %s has no calibre columns set" % username # check user columns exist in calibre and create if not res = calibredb('custom_columns') columns = res[0].split('\n') custom_columns = [] for column in columns: if column: custom_columns.append(column.split(' (')[0]) if col_read not in custom_columns: added = calibredb('add_custom_column', [col_read, col_read, 'bool']) if "column created" not in added[0]: return added if col_toread not in custom_columns: added = calibredb('add_custom_column', [col_toread, col_toread, 'bool']) if "column created" not in added[0]: return added nomatch = 0 readcol = '' toreadcol = '' map_ctol = {} map_ltoc = {} if col_read: readcol = '*' + col_read if col_toread: toreadcol = '*' + col_toread calibre_list = calibreList(col_read, col_toread) if not isinstance(calibre_list, list): # got an error message from calibredb return '"%s"' % calibre_list for item in calibre_list: if toreadcol and toreadcol in item or readcol and readcol in item: authorname, authorid, added = addAuthorNameToDB(item['authors'], refresh=False, addbooks=False) if authorname: if authorname != item['authors']: logger.debug( "Changed authorname for [%s] from [%s] to [%s]" % (item['title'], item['authors'], authorname)) item['authors'] = authorname bookid = find_book_in_db(authorname, item['title']) if not bookid: searchterm = "%s <ll> %s" % (item['title'], authorname) results = search_for(unaccented(searchterm)) if results: result = results[0] if result['author_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO'] \ and result['book_fuzz'] > lazylibrarian.CONFIG['MATCH_RATIO']: logger.debug( "Found (%s%% %s%%) %s: %s" % (result['author_fuzz'], result['book_fuzz'], result['authorname'], result['bookname'])) bookid = result['bookid'] import_book(bookid) if bookid: # NOTE: calibre bookid is always an integer, lazylibrarian bookid is a string # (goodreads could be used as an int, but googlebooks can't as it's alphanumeric) # so convert all dict items to strings for ease of matching. map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) else: logger.warn( 'Calibre Book [%s] by [%s] is not in lazylibrarian database' % (item['title'], authorname)) nomatch += 1 else: logger.warn( 'Calibre Author [%s] not matched in lazylibrarian database' % (item['authors'])) nomatch += 1 # Now check current users lazylibrarian read/toread against the calibre library, warn about missing ones # which might be books calibre doesn't have, or might be minor differences in author or title for idlist in [("Read", readlist), ("To_Read", toreadlist)]: booklist = idlist[1] for bookid in booklist: cmd = "SELECT AuthorID,BookName from books where BookID=?" book = myDB.match(cmd, (bookid, )) if not book: logger.error('Error finding bookid %s' % bookid) else: cmd = "SELECT AuthorName from authors where AuthorID=?" author = myDB.match(cmd, (book['AuthorID'], )) if not author: logger.error('Error finding authorid %s' % book['AuthorID']) else: match = False for item in calibre_list: if item['authors'] == author['AuthorName'] and item[ 'title'] == book['BookName']: logger.debug("Exact match for %s [%s]" % (idlist[0], book['BookName'])) map_ctol[str(item['id'])] = str(bookid) map_ltoc[str(bookid)] = str(item['id']) match = True break if not match: high = 0 highname = '' highid = '' for item in calibre_list: if item['authors'] == author['AuthorName']: n = fuzz.token_sort_ratio( item['title'], book['BookName']) if n > high: high = n highname = item['title'] highid = item['id'] if high > 95: logger.debug( "Found ratio match %s%% [%s] for %s [%s]" % (high, highname, idlist[0], book['BookName'])) map_ctol[str(highid)] = str(bookid) map_ltoc[str(bookid)] = str(highid) match = True if not match: logger.warn( "No match for %s %s by %s in calibre database, closest match %s%% [%s]" % (idlist[0], book['BookName'], author['AuthorName'], high, highname)) nomatch += 1 logger.debug("BookID mapping complete, %s match %s, nomatch %s" % (username, len(map_ctol), nomatch)) # now sync the lists if userid: last_read = [] last_toread = [] calibre_read = [] calibre_toread = [] cmd = 'select SyncList from sync where UserID=? and Label=?' res = myDB.match(cmd, (userid, col_read)) if res: last_read = getList(res['SyncList']) res = myDB.match(cmd, (userid, col_toread)) if res: last_toread = getList(res['SyncList']) for item in calibre_list: if toreadcol and toreadcol in item and item[ toreadcol]: # only if True if str(item['id']) in map_ctol: calibre_toread.append(map_ctol[str(item['id'])]) else: logger.warn( "Calibre to_read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) if readcol and readcol in item and item[readcol]: # only if True if str(item['id']) in map_ctol: calibre_read.append(map_ctol[str(item['id'])]) else: logger.warn( "Calibre read book %s:%s has no lazylibrarian bookid" % (item['authors'], item['title'])) logger.debug("Found %s calibre read, %s calibre toread" % (len(calibre_read), len(calibre_toread))) logger.debug("Found %s lazylib read, %s lazylib toread" % (len(readlist), len(toreadlist))) added_to_ll_toread = list(set(toreadlist) - set(last_toread)) removed_from_ll_toread = list(set(last_toread) - set(toreadlist)) added_to_ll_read = list(set(readlist) - set(last_read)) removed_from_ll_read = list(set(last_read) - set(readlist)) logger.debug("lazylibrarian changes to copy to calibre: %s %s %s %s" % (len(added_to_ll_toread), len(removed_from_ll_toread), len(added_to_ll_read), len(removed_from_ll_read))) added_to_calibre_toread = list(set(calibre_toread) - set(last_toread)) removed_from_calibre_toread = list( set(last_toread) - set(calibre_toread)) added_to_calibre_read = list(set(calibre_read) - set(last_read)) removed_from_calibre_read = list(set(last_read) - set(calibre_read)) logger.debug( "calibre changes to copy to lazylibrarian: %s %s %s %s" % (len(added_to_calibre_toread), len(removed_from_calibre_toread), len(added_to_calibre_read), len(removed_from_calibre_read))) calibre_changes = 0 for item in added_to_calibre_read: if item not in readlist: readlist.append(item) logger.debug("Lazylibrarian marked %s as read" % item) calibre_changes += 1 for item in added_to_calibre_toread: if item not in toreadlist: toreadlist.append(item) logger.debug("Lazylibrarian marked %s as to_read" % item) calibre_changes += 1 for item in removed_from_calibre_read: if item in readlist: readlist.remove(item) logger.debug("Lazylibrarian removed %s from read" % item) calibre_changes += 1 for item in removed_from_calibre_toread: if item in toreadlist: toreadlist.remove(item) logger.debug("Lazylibrarian removed %s from to_read" % item) calibre_changes += 1 if calibre_changes: myDB.action('UPDATE users SET ToRead=?,HaveRead=? WHERE UserID=?', (', '.join(toreadlist), ', '.join(readlist), userid)) ll_changes = 0 for item in added_to_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_toread, item)) for item in removed_from_ll_toread: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_toread, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_toread, item)) for item in added_to_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], 'true'], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to set calibre %s true for %s" % (col_read, item)) for item in removed_from_ll_read: if item in map_ltoc: res, err, rc = calibredb('set_custom', [col_read, map_ltoc[item], ''], []) if rc: msg = "calibredb set_custom error: " if err: logger.error(msg + err) elif res: logger.error(msg + res) else: logger.error(msg + str(rc)) else: ll_changes += 1 else: logger.warn("Unable to clear calibre %s for %s" % (col_read, item)) # store current sync list as comparison for next sync controlValueDict = {"UserID": userid, "Label": col_read} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(readlist) } myDB.upsert("sync", newValueDict, controlValueDict) controlValueDict = {"UserID": userid, "Label": col_toread} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(toreadlist) } myDB.upsert("sync", newValueDict, controlValueDict) msg = "%s sync updated: %s calibre, %s lazylibrarian" % ( username, ll_changes, calibre_changes) return msg