def addKeyword(self, type=None, title=None, frequency=None, **args): myDB = database.DBConnection() if type == 'magazine': if len(title) == 0: raise cherrypy.HTTPRedirect("config") else: controlValueDict = {"Title": title} newValueDict = { "Frequency": frequency, "Regex": None, "Status": "Active", "MagazineAdded": formatter.today(), "IssueStatus": "Wanted" } myDB.upsert("magazines", newValueDict, controlValueDict) mags = [] mags.append({"bookid": title}) books = False if (lazylibrarian.USE_NZB): threading.Thread(target=search_nzb_book, args=[books, mags]).start() if (lazylibrarian.USE_TOR): threading.Thread(target=search_tor_book, args=[books, mags]).start() logger.debug("Searching for magazine with title: " + str(title)) raise cherrypy.HTTPRedirect("magazines")
def addKeyword(self, type=None, title=None, frequency=None, **args): myDB = database.DBConnection() if type == 'magazine': if len(title) == 0: raise cherrypy.HTTPRedirect("config") else: controlValueDict = {"Title": title} newValueDict = { "Frequency": frequency, "Regex": None, "Status": "Active", "MagazineAdded": formatter.today(), "IssueStatus": "Wanted" } myDB.upsert("magazines", newValueDict, controlValueDict) mags = [] mags.append({"bookid": title}) books=False if (lazylibrarian.USE_NZB): threading.Thread(target=search_nzb_book, args=[books, mags]).start() if (lazylibrarian.USE_TOR): threading.Thread(target=search_tor_book, args=[books, mags]).start() logger.debug("Searching for magazine with title: " + str(title)); raise cherrypy.HTTPRedirect("magazines")
def addAuthorToDB(authorname=None, refresh=False): myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] if 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) update_totals(authorid) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid) lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone() bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) for count in bookCount: controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": count['counter'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % str(count['counter']))
def addAuthorToDB(authorname=None, refresh=False): myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] if 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) update_totals(authorid) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" #type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.info("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.info("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) logger.info("[%s] Author update complete" % authorname)
def addKeyword(self, type=None, title=None, frequency=None, **args): myDB = database.DBConnection() if type == 'magazine': if len(title) == 0: raise cherrypy.HTTPRedirect("config") else: controlValueDict = {"Title": title} newValueDict = { "Frequency": frequency, "Regex": None, "Status": "Active", "MagazineAdded": formatter.today(), } myDB.upsert("magazines", newValueDict, controlValueDict) raise cherrypy.HTTPRedirect("magazines")
def _addMagazine(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] controlValueDict = {"Title": self.id} newValueDict = { "Frequency": None, "Regex": None, "Status": "Active", "MagazineAdded": formatter.today(), "IssueStatus": "Wanted" } myDB.upsert("magazines", newValueDict, controlValueDict)
def _addMagazine(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] controlValueDict = {"Title": self.id} newValueDict = { "Regex": None, "Status": "Active", "MagazineAdded": today(), "IssueStatus": "Wanted", "Reject": None } myDB.upsert("magazines", newValueDict, controlValueDict)
def _addMagazine(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return else: self.id = kwargs['name'] myDB = database.DBConnection() controlValueDict = {"Title": self.id} newValueDict = { "Regex": None, "Status": "Active", "MagazineAdded": today(), "IssueStatus": "Wanted", "Reject": None } myDB.upsert("magazines", newValueDict, controlValueDict)
def import_book(pp_path=None, bookID=None): # Separated this into a function so we can more easily import books from an alternate directory # and move them into LL folder structure given just the bookID, returns True or False # eg if import_book(source_directory, bookID): # ppcount = ppcount + 1 # myDB = database.DBConnection() data = myDB.select('SELECT * from books WHERE BookID="%s"' % bookID) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] #try: # auth_dir = os.path.join(lazylibrarian.DESTINATION_DIR, authorname).encode(lazylibrarian.SYS_ENCODING) # os.chmod(auth_dir, 0777) #except Exception, e: # logger.debug("Could not chmod author directory: " + str(auth_dir)) if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, bookID) if processBook: # update nzbs controlValueDict = {"BookID": bookID} newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) processExtras(myDB, dest_path, global_name, data) return True else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s' % pp_path) return False
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid)
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'","''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books GR.get_author_books(authorid)
def get_capabilities(provider): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ match = False if len(provider['UPDATED']) == 10: # any stored values? match = True if (formatter.age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?t=caps&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) resp = "" try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: logger.debug("Error getting capabilities: %s" % e) resp = "" if resp: if str(resp.getcode()).startswith("2"): # (200 OK etc) logger.debug(u"Got capabilities for %s" % request.get_full_url()) try: source_xml = resp.read() # .decode('utf-8') data = ElementTree.fromstring(source_xml) except: logger.debug(u"Error getting xml from %s" % URL) data = None if len(data): logger.debug(u"Parsing xml for capabilities of %s" % URL) ############################################################################# # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want ############################################################################# # # set some defaults # provider['GENERALSEARCH'] = '' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' # search = data.find('searching/search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'books': bookcat = cat.attrib['id'] # keep main bookcat for later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'books' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'books' else: provider['BOOKSEARCH'] = '' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'books' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower(): provider['BOOKCAT'] = "%s,%s" % (provider['BOOKCAT'],subcat.attrib['id']) if 'magazines' in subcat.attrib['name'].lower() or 'mags' in subcat.attrib['name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % (provider['MAGCAT'],subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib['id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT'])) provider['UPDATED'] = formatter.today() else: logger.warn(u"Unable to get capabilities for %s: No data returned" % URL) else: logger.warn(u"Unable to get capabilities for %s: Got %s" % (URL, resp.getcode())) return provider
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug('Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn('Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn('Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID,)) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'],)) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in ['Skipped', 'Ignored']: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian.CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(bookid, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, refresh=False): books_dict=[] set_url = self.url + urllib.quote('inauthor:' + '"' + authorname + '"') URL = set_url + '&' + urllib.urlencode(self.params) api_hits = 0 logger.info('[%s] Now processing books with Google Books API' % authorname) #Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: startindex = 0 resultcount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 while True: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read()) api_hits = api_hits + 1 number_results = jsonresults['totalItems'] logger.debug('[%s] Searching url: %s' % (authorname, URL)) if number_results == 0: logger.info('Found no results for %s with value: %s' % (api_value, self.name)) break else: pass except HTTPError, err: logger.Error('Google API returned HTTP Error - probably time/rate limiting - [%s]' % err.msg) startindex = startindex+40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: #skip if language is in ignore list booklang = item['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug('Skipped a book with language %s' % booklang) ignored = ignored + 1 continue except KeyError: ignored = ignored+1 logger.debug('Skipped a result where no language is found') continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = None try: if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo']['industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None bookid = item['id'] # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = item['volumeInfo']['title'] booknametmp1=booknamealt.replace(u'\xf6',u'oe') booknametmp2=booknametmp1.replace(u'\xe4',u'ae') booknametmp3=booknametmp2.replace(u'\xdf',u'ss') booknametmp4=booknametmp3.replace(u'\xc4',u'Ae') booknametmp5=booknametmp4.replace(u'\xdc',u'Ue') booknametmp6=booknametmp5.replace(u'\xd6',u'Oe') booknametmp7=booknametmp6.replace(':','') bookname=booknametmp7.replace(u'\xfc',u'ue') # Darkie67 end booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) find_book_status = myDB.select("SELECT * FROM books WHERE BookID = '%s'" % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = "Skipped" if not (re.match('[^\w-]', bookname)): #remove books with bad caracters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": authorid, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": formatter.today(), "Series": None, "SeriesOrder": None } resultcount = resultcount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"book found " + bookname + " " + bookdate) if not find_book_status: logger.info("[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: updated_count = updated_count + 1 logger.info("[%s] Updated book: %s" % (authorname, bookname)) else: book_ignore_count = book_ignore_count + 1 else: removedResults = removedResults + 1 if startindex >= number_results: break else: continue except KeyError: pass logger.info('[%s] The Google Books API was hit %s times to populate book list' % (authorname, str(api_hits))) lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' AND Status != 'Ignored' order by BookDate DESC" % authorid).fetchone() unignoredbooks = myDB.select("SELECT COUNT(BookName) as unignored FROM books WHERE AuthorID='%s' AND Status != 'Ignored'" % authorid) bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": bookCount[0]['counter'], "UnignoredBooks": unignoredbooks[0]['unignored'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total books for author" % total_count) logger.debug("Removed %s bad language results for author" % ignored) logger.debug("Removed %s bad character results for author" % removedResults) logger.debug("Ignored %s books by author marked as Ignored" % book_ignore_count) logger.debug("Imported/Updated %s books for author" % resultcount) if refresh: logger.info("[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count))) else: logger.info("[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count))) return books_dict
"AuthorID": authorid, "AuthorLink": "", "BookName": book.find('title').text, "BookSub": "", "BookDesc": book.find('description').text, "BookIsbn": book.find('isbn').text, "BookPub": book.find('publisher').text, "BookGenre": "", "BookImg": bookimg, "BookLink": book.find('link').text, "BookRate": float(book.find('average_rating').text), "BookPages": book.find('num_pages').text, "BookDate": pubyear, "BookLang": bookLanguage, "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"book found " + book.find('title').text + " " + pubyear) resultsCount = resultsCount + 1 lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone() bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) for count in bookCount: controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": count['counter'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'],
def get_author_books(self, authorid=None, authorname=None, refresh=False): logger.debug( '[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names aname = unidecode(u'%s' % authorname) set_url = self.url + urllib.quote('inauthor:' + '"' + aname + '"') URL = set_url + '&' + urllib.urlencode(self.params) books_dict = [] api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: startindex = 0 resultcount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = self.get_request(URL) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits = api_hits + 1 number_results = jsonresults['totalItems'] except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break if number_results == 0: logger.warn('Found no results for %s' % (authorname)) break else: logger.debug( 'Found %s results for %s' % (number_results, authorname)) startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: if item['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = "" except KeyError: bookisbn = "" isbnhead = "" if len(bookisbn) == 10: isbnhead = bookisbn[0:3] try: booklang = item['volumeInfo']['language'] except KeyError: booklang = "Unknown" # do we care about language? if "All" not in valid_langs: if bookisbn != "": # seems google lies to us, sometimes tells us books # are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): booklang = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # librarything returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + \ bookisbn try: time.sleep(1) # sleep 1 second to respect librarything api terms resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if (resp != 'invalid' and resp != 'unknown'): booklang = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"LT language: " + booklang) except Exception as e: booklang = "" logger.error("Error finding language: %s" % e) if googlelang == "en" and booklang not in "en-US, en-GB, eng": # these are all english, may need to expand # this list booknamealt = item['volumeInfo']['title'] logger.debug("%s Google thinks [%s], we think [%s]" % (booknamealt, googlelang, booklang)) gb_lang_change = gb_lang_change + 1 else: match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (not match): myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"GB language: " + booklang) # skip if language is in ignore list if booklang not in valid_langs: booknamealt = item['volumeInfo']['title'] logger.debug( 'Skipped [%s] with language %s' % (booknamealt, booklang)) ignored = ignored + 1 continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = None try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = None except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = None bookname = item['volumeInfo']['title'] bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) find_book_status = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = lazylibrarian.NEWBOOK_STATUS if not (re.match('[^\w-]', bookname)): # remove books with bad characters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": authorid, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } resultcount = resultcount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + bookname + " " + bookdate) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug("[%s] Added book: %s [%s]" % (authorname, bookname, booklang)) added_count = added_count + 1 else: updated_count = updated_count + 1 logger.debug("[%s] Updated book: %s" % (authorname, bookname)) else: book_ignore_count = book_ignore_count + 1 else: logger.debug( "[%s] removed book for bad characters" % (bookname)) removedResults = removedResults + 1 except KeyError: pass logger.debug('[%s] The Google Books API was hit %s times to populate book list' % (authorname, str(api_hits))) lastbook = myDB.action('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone() if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total books for author" % total_count) logger.debug("Removed %s bad language results for author" % ignored) logger.debug( "Removed %s bad character results for author" % removedResults) logger.debug( "Ignored %s books by author marked as Ignored" % book_ignore_count) logger.debug("Imported/Updated %s books for author" % resultcount) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached)) if refresh: logger.info("[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count))) else: logger.info("[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count))) return books_dict
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.googleapis.com/books/v1/volumes/' + str( bookid) + "?key=" + lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) bookname = jsonresults['volumeInfo']['title'] try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field' % bookname) try: #skip if language is in ignore list booklang = jsonresults['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round unicode differences in torrent filenames. # there might be a better way... if isinstance(fname, str): matchname = fname.decode('utf-8') else: matchname = fname if 'LL.(' in matchname: matchname = matchname.split('LL.(')[0] match = fuzz.token_set_ratio(matchname, book['NZBtitle']) if match >= 95: pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID" : magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset == True: common.schedule_job(action='Restart', target='processDir')
def addAuthorToDB(authorname=None): threading.currentThread().name = "DBIMPORT" type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GR.get_author_books(authorid) for book in books: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookSub": book['booksub'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookPub": book['bookpub'], "BookGenre": book['bookgenre'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount + 1 lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' order by BookDate DESC" % authorid).fetchone() bookCount = myDB.select( "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) for count in bookCount: controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": count['counter'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % str(count['counter']))
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except KeyError,AttributeError: bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_LIBRARYTHING: # called within the last second? time.sleep(1) # sleep 1 second to respect librarything api terms resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lazylibrarian.LAST_LIBRARYTHING = time_now lt_lang_hits = lt_lang_hits + 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if (resp == 'invalid' or resp == 'unknown'): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language: " + bookLanguage) except Exception as e: find_field = "id" # reset the field to search on goodreads logger.error("Error finding LT language result: %s" % e) if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = self.get_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.error("Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text # \( Must have ( # ([\S\s]+) followed by a group of one or more non whitespace # ,? # followed by optional comma, then space hash # ( start next group # \d+ must have one or more digits # \.? then optional decimal point, (. must be escaped) # -? optional dash for a range # \d{0,} zero or more digits # ) end group result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname) if result: series = result.group(1) if series[-1] == ',': series = series[:-1] seriesNum = result.group(2) else: series = None seriesNum = None find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = lazylibrarian.NEWBOOK_STATUS bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace if not (re.match('[^\w-]', bookname)): # remove books with bad characters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getWorkCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link != bookimg: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 else: logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GR-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname) if result: series = result.group(1) if series[-1] == ',': series = series[:-1] seriesNum = result.group(2) else: series = None seriesNum = None bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getWorkCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link != bookimg: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict)
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.info(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s at %s" % (global_name, now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')
def find_book(self, bookid=None, bookstatus="None"): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = gb_json_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookid) return if not bookstatus: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] book = bookdict(jsonresults) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(book['name'], dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace if not book['author']: logger.debug('Book %s does not contain author field, skipping' % bookname) return # warn if language is in ignore list, but user said they wanted this book valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if book['lang'] not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, book['lang'])) if lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date'] or book['date'] == '0000': logger.warn( 'Book %s Publication date does not match preference, %s' % (bookname, book['date'])) if lazylibrarian.CONFIG['NO_FUTURE']: if book['date'] > today()[:4]: logger.warn( 'Book %s Future publication date does not match preference, %s' % (bookname, book['date'])) authorname = book['author'] GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID=?', (AuthorID, )) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName=?', (author['authorname'], )) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author with newauthor status # User hit "add book" button from a search or a wishlist import newauthor_status = 'Active' if lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] in [ 'Skipped', 'Ignored' ]: newauthor_status = 'Paused' controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": newauthor_status } authorname = author['authorname'] myDB.upsert("authors", newValueDict, controlValueDict) if lazylibrarian.CONFIG['NEWAUTHOR_BOOKS']: self.get_author_books(AuthorID, entrystatus=lazylibrarian. CONFIG['NEWAUTHOR_STATUS']) else: logger.warn("No AuthorID for %s, unable to add book %s" % (book['author'], bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": book['lang'], "Status": bookstatus, "AudioStatus": lazylibrarian.CONFIG['NEWAUDIO_STATUS'], "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s by %s added to the books database" % (bookname, authorname)) if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img']) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) setSeries(serieslist, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote( 'inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request( URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug( 'Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match( 'SELECT lang FROM languages where isbn=?', (isbnhead, )) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len( book['isbn'] ) == 13 and book[ 'isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_979_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_978_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action( 'insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in [ "en-US", "en-GB", "eng" ]: # these are all english, may need to expand this list logger.debug( "%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), { ':': '.', '"': '', '\'': '' }).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid, )) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG[ 'NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match( '[^\w-]', bookname ): # remove books with bad characters in title logger.debug( "[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug( 'Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug( 'Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace( '"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid, )) if match: # we have a book with this bookid already if bookname != match[ 'BookName'] or authorname != match[ 'AuthorName']: logger.debug( 'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug( 'Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book[ 'img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug( 'Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith( 'http'): link, success, _ = cache_img( "book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug( 'Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [ ('', book['seriesNum'], cleanName(unaccented(book['series']), '&/')) ] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug( 'Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug( "[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug( "[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug( '[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid, )) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action( 'insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def processDir(force=False, reset=False): # rename this thread threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if force is False and len(snatched) == 0: logger.info('Nothing marked as snatched. Stopping postprocessor job.') common.schedule_job(action='Stop', target='processDir') elif len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') else: logger.debug("Checking %s downloads for %s snatched files" % (len(downloads), len(snatched))) ppcount = 0 for book in snatched: found = False for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= 95: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if formatter.is_valid_booktype(fname, booktype="book") \ or formatter.is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder %s for %s' % (pp_path, book['NZBtitle'])) found = True break else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if found: data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = common.remove_accents(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = formatter.latinToAscii(formatter.replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = common.remove_accents(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue # try: # os.chmod(dest_path, 0777) # except Exception, e: # logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": formatter.now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file, "IssueID": magazinescan.create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue magazinescan.create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found') if reset: common.schedule_job(action='Restart', target='processDir')
def get_capabilities(provider): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ match = False if len(provider['UPDATED']) == 10: # any stored values? match = True if (age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?t=caps&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) if success: data = ElementTree.fromstring(source_xml) else: logger.debug(u"Error getting xml from %s, %s" % (URL, source_xml)) data = '' if len(data): logger.debug(u"Parsing xml for capabilities of %s" % URL) # # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want # # # set some defaults # provider['GENERALSEARCH'] = 'search' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' # search = data.find('searching/search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'books': bookcat = cat.attrib['id'] # keep main bookcat for later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'book' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower(): provider['BOOKCAT'] = "%s,%s" % (provider['BOOKCAT'], subcat.attrib['id']) if 'magazines' in subcat.attrib['name'].lower() or 'mags' in subcat.attrib['name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % (provider['MAGCAT'], subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib['id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT'])) provider['UPDATED'] = today() lazylibrarian.config_write() else: logger.warn(u"Unable to get capabilities for %s: No data returned" % URL) return provider
def get_capabilities(provider, force=False): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ if not force and len(provider['UPDATED']) == 10: # any stored values? match = True if (age(provider['UPDATED']) > lazylibrarian.CONFIG['CACHE_AGE']) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False else: match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host if host[-1:] == '/': host = host[:-1] URL = host + '/api?t=caps' # most providers will give you caps without an api key logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) # If it failed, retry with api key if not success: if provider['API']: URL = URL + '&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) source_xml, success = fetchURL(URL) if success: try: data = ElementTree.fromstring(source_xml) except ElementTree.ParseError: data = '' logger.debug("Error parsing xml from %s, %s" % (URL, source_xml)) else: logger.debug("Error getting xml from %s, %s" % (URL, source_xml)) data = '' if len(data): logger.debug("Parsing xml for capabilities of %s" % URL) # # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want # # # set some defaults # provider['GENERALSEARCH'] = 'search' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['AUDIOCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' provider['AUDIOSEARCH'] = '' # search = data.find('searching/search') if search is not None: # noinspection PyUnresolvedReferences if 'available' in search.attrib: # noinspection PyUnresolvedReferences if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'audio': provider['AUDIOCAT'] = cat.attrib['id'] subcats = cat.getiterator('subcat') for subcat in subcats: if 'audiobook' in subcat.attrib['name'].lower(): provider['AUDIOCAT'] = "%s,%s" % ( provider['AUDIOCAT'], subcat.attrib['id']) elif cat.attrib['name'].lower() == 'books': bookcat = cat.attrib[ 'id'] # keep main bookcat for starting magazines later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' # set default booksearch if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'book' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case we got some settings back search = data.find('searching/book-search') if search: # noinspection PyUnresolvedReferences if 'available' in search.attrib: # noinspection PyUnresolvedReferences if search.attrib['available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower(): provider['BOOKCAT'] = "%s,%s" % ( provider['BOOKCAT'], subcat.attrib['id']) if 'magazines' in subcat.attrib['name'].lower( ) or 'mags' in subcat.attrib['name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % ( provider['MAGCAT'], subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib['id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s : Audio %s" % (provider['BOOKCAT'], provider['MAGCAT'], provider['AUDIOCAT'])) provider['UPDATED'] = today() threadname = threading.currentThread().name lazylibrarian.config_write() threading.currentThread().name = threadname else: logger.warn("Unable to get capabilities for %s: No data returned" % URL) return provider
def addAuthorToDB(authorname=None, refresh=False): """ Add an author to the database, and get list of all their books If author already exists in database, refresh their details and booklist """ try: myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return new_img = False if authorimg and 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) new_img = True if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg new_img = True if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if dbauthor: update_totals(authorid) logger.debug("[%s] Author update complete" % authorname) except Exception as e: logger.error('Unhandled exception in addAuthorToDB: %s' % traceback.format_exc())
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GR-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode( self.params) try: rootxml, in_cache = self.get_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] result = re.search(r"\(([\S\s]+),? #(\d+\.?-?\d{0,})", bookname) if result: series = result.group(1) if series[-1] == ',': series = series[:-1] seriesNum = result.group(2) else: series = None seriesNum = None bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = {"Series": series, "SeriesNum": seriesNum} myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = { "BookID": "BookID: %s" % (bookid), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode( self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] booksub = '' bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = {"Series": series, "SeriesNum": seriesNum} myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % str(e)) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for unwanted language, just warn # valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if bookLanguage not in valid_langs: logger.debug('Book %s goodreads language does not match preference, %s' % (bookname, bookLanguage)) if rootxml.find('./book/publication_year').text is None: bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match('SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match('SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug('%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match['AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return bookname = unaccented(bookname) bookname, booksub = split_title(authorname, bookname) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic).strip() booksub = replace_all(booksub, dic).strip() if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh entry_status = '' if authorid: dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (authorid, )) if not dbauthor: authorname = 'unknown author' logger.debug("Adding new author id %s to database" % authorid) new_author = True else: entry_status = dbauthor['Status'] authorname = dbauthor['authorname'] logger.debug("Updating author %s " % authorname) new_author = False controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID=?', (authorid, )) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) author = GR.find_author_id(refresh=refresh) query = "SELECT * from authors WHERE AuthorName=?" dbauthor = myDB.match(query, (authorname.replace("'", "''"), )) if author and not dbauthor: # may have different name for same authorid (spelling?) query = "SELECT * from authors WHERE AuthorID=?" dbauthor = myDB.match(query, (author['authorid'], )) authorname = dbauthor['AuthorName'] controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) entry_status = dbauthor['Status'] new_author = False myDB.upsert("authors", newValueDict, controlValueDict) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName=?', (authorname, )) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID=?", (authorid, )) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: # audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']: entry_status = 'Active' # default for invalid/unknown or "loading" # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": book_api = GoodReads(authorname) book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']: res = grfollow(authorid, True) if res.startswith('Unable'): logger.warn(res) try: followid = res.split("followid=")[1] logger.debug('%s marked followed' % authorname) except IndexError: followid = '' myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?', (followid, authorid)) else: # if we're not loading any books, mark author as ignored entry_status = 'Ignored' controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": entry_status} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete, status %s" % (authorname, entry_status) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False): try: api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % str(e)) return if rootxml is None: logger.debug("Error requesting author books") return if not in_cache: api_hits += 1 resultxml = rootxml.getiterator('book') valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text # Goodreads sometimes puts extra whitepase in the author names! authorNameResult = ' '.join(authorNameResult.split()) logger.debug(u"GoodReads author name [%s]" % authorNameResult) loopCount = 1 while resultxml: for book in resultxml: total_count += 1 if book.find('publication_year').text is None: pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if 'nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if book.find('isbn').text: find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if book.find('isbn13').text: find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] # Try to use shortcut of ISBN identifier codes described here... # https://en.wikipedia.org/wiki/List_of_ISBN_identifier_groups if isbnhead: if find_field == "isbn13" and isbn.startswith('979'): for item in lazylibrarian.isbn_979_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_979_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN979 returned %s for %s" % (bookLanguage, isbnhead)) elif (find_field == "isbn") or (find_field == "isbn13" and isbn.startswith('978')): for item in lazylibrarian.isbn_978_dict: if isbnhead.startswith(item): bookLanguage = lazylibrarian.isbn_978_dict[item] break if bookLanguage != "Unknown": logger.debug("ISBN978 returned %s for %s" % (bookLanguage, isbnhead)) if bookLanguage == "Unknown" and isbnhead: # Nothing in the isbn dictionary, try any cached results match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if match: bookLanguage = match['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits += 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if 'invalid' in resp or 'Unknown' in resp: bookLanguage = "Unknown" else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error("Error finding LT language result for [%s], %s" % (isbn, str(e))) if bookLanguage == "Unknown": # still no earlier match, we'll have to search the goodreads api try: if book.find(find_field).text: BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) bookLanguage = "" try: BOOK_rootxml, in_cache = get_xml_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now try: bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.debug("Error finding language_code in book xml: %s" % str(e)) except Exception as e: logger.debug("Error getting book xml: %s" % str(e)) if not in_cache: gr_lang_hits += 1 if not bookLanguage: bookLanguage = "Unknown" # At this point, give up? # WhatWork on author/title doesn't give us a language. # It might give us the "original language" of the book (but not always) # and our copy might not be in the original language anyway # eg "The Girl With the Dragon Tattoo" original language Swedish # If we have an isbn, try WhatISBN to get alternatives # in case any of them give us a language, but it seems if thinglang doesn't # have a language for the first isbn code, it doesn't for any of the # alternatives either # Goodreads search results don't include the language. Although sometimes # it's in the html page, it's not in the xml results if isbnhead != "": # if GR didn't give an isbn we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached += 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"Goodreads language search failed: %s" % str(e)) if bookLanguage not in valid_langs: logger.debug('Skipped %s with language %s' % (book.find('title').text, bookLanguage)) ignored += 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) bookname, booksub = split_title(authorNameResult, bookname) dic = {':': '.', '"': ''} # do we need to strip apostrophes , '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) rejected = False check_status = False if re.match('[^\w-]', bookname): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: if pubyear > today()[:4]: logger.debug('Rejecting %s, future publication date %s' % (bookname, pubyear)) removedResults += 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE' % \ (bookname, authorNameResult.replace('"', '""')) match = myDB.match(cmd) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorNameResult, bookname, bookid)) duplicates += 1 rejected = True if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=%s' % bookid match = myDB.match(cmd) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorNameResult != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorNameResult, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorNameResult, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: existing_book = myDB.match('SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status locked = False # Is the book already in the database? # Leave alone if locked or status "ignore" if not locked and book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": "", "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today() } resultsCount += 1 updated = False myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) updated = True else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug(u"[%s] Added book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) added_count += 1 elif updated: logger.debug(u"[%s] Updated book: %s [%s] status %s" % (authorname, bookname, bookLanguage, book_status)) updated_count += 1 else: book_ignore_count += 1 loopCount += 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits += 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % str(e)) if resultxml: if all(False for _ in resultxml): # returns True if iterator is empty resultxml = None deleteEmptySeries() lastbook = myDB.match('SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s result%s" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s" % (modified_count, plural(modified_count))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GR.get_author_books: %s' % traceback.format_exc())
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GR-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode( self.params) try: rootxml, in_cache = self.get_request(URL) except Exception as e: logger.error("Error finding book: " + str(e)) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if (bookimg == 'http://www.goodreads.com/assets/nocover/111x148.png'): bookimg = 'images/nocover.png' except KeyError: bookimg = 'images/nocover.png' except AttributeError: bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname)
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode(self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except (KeyError,AttributeError): bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug("Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug("LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if ('invalid' in resp or 'Unknown' in resp): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error("Error finding LT language result for [%s], %s" % (isbn, e)) find_field = "id" # reset the field to search on goodreads if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = get_xml_request(BOOK_URL) if BOOK_rootxml is None: logger.debug('Error requesting book language code') bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find('./book/language_code').text except Exception as e: logger.error("Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug("GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug("No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] else: booksub = '' dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series,seriesNum = bookSeries(booksub) else: series,seriesNum = bookSeries(bookname) # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # We use bookid, then reject if another author/title has a different bookid so we just keep one... find_book_status = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] locked = resulted ['Manual'] else: book_status = lazylibrarian.NEWBOOK_STATUS locked = False rejected = False if re.match('[^\w-]', bookname): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 rejected = True if not rejected and not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults = removedResults + 1 rejected = True if not rejected: find_books = myDB.select('SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' % (bookname, authorNameResult)) if find_books: for find_book in find_books: if find_book['BookID'] != bookid: # we have a book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (find_book['BookID'], authorNameResult, bookname, bookid)) duplicates = duplicates + 1 rejected = True break if not rejected: find_books = myDB.select('SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_books: # we have a book with this bookid already logger.debug('Rejecting bookid %s for [%s][%s] already got this bookid in database' % (bookid, authorNameResult, bookname)) duplicates = duplicates + 1 rejected = True break if not rejected: if book_status != "Ignored": if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None lastbook = myDB.action('SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone() if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count))) myDB.action('insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) return books_dict
def addAuthorToDB(authorname=None): type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GB.find_results() for book in books: # this is for rare cases where google returns multiple authors who share nameparts if book['authorname'] == authorname: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount + 1 lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC" % authorname).fetchone() controlValueDict = {"AuthorName": authorname} newValueDict = { "Status": "Active", "TotalBooks": bookscount, "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % bookscount)
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace("'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) havebooks = myDB.action( 'SELECT count("BookID") as counter from books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname).fetchone() myDB.action('UPDATE authors set HaveBooks="%s" where AuthorName="%s"' % (havebooks['counter'], authorname)) totalbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s"' % authorname).fetchone() myDB.action('UPDATE authors set TotalBooks="%s" where AuthorName="%s"' % (totalbooks['counter'], authorname)) unignoredbooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND Status!="Ignored"' % authorname).fetchone() myDB.action('UPDATE authors set UnignoredBooks="%s" where AuthorName="%s"' % (unignoredbooks['counter'], authorname)) logger.debug("[%s] Author update complete" % authorname)
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh if authorid: controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} dbauthor = myDB.match("SELECT * from authors WHERE AuthorID='%s'" % authorid) if not dbauthor: authorname = 'unknown author' logger.debug("Now adding new author id: %s to database" % authorid) new_author = True else: authorname = dbauthor['authorname'] logger.debug("Now updating author %s " % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID="%s"' % authorid) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID='%s'" % authorid) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(authorname) GR.get_author_books(authorid, authorname, bookstatus, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) else: # if we're not loading any books, mark author as ignored controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Ignored"} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete" % authorname logger.debug(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", refresh=False): try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + urllib.quote( 'inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults, in_cache = get_json_request( URL, useCache=not refresh) if not jsonresults: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except HTTPError as err: logger.warn( 'Google Books API Error [%s]: Check your API key or wait a while' % err.reason) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 # skip if no author, no author is no book. try: _ = item['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Skipped a result without authorfield.') continue try: if item['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = "" except KeyError: bookisbn = "" isbnhead = "" if len(bookisbn) == 10: isbnhead = bookisbn[0:3] elif len(bookisbn) == 13: isbnhead = bookisbn[3:6] try: booklang = item['volumeInfo']['language'] except KeyError: booklang = "Unknown" # do we care about language? if "All" not in valid_langs: if bookisbn != "": # seems google lies to us, sometimes tells us books # are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match( 'SELECT lang FROM languages where isbn = "%s"' % isbnhead) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug( "Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len( bookisbn ) == 13 and bookisbn.startswith( '979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_979_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(bookisbn) == 10) or \ (len(bookisbn) == 13 and bookisbn.startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith( lang): booklang = lazylibrarian.isbn_978_dict[ lang] logger.debug( "ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"GB language: " + booklang) if not match: # try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # librarything returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + bookisbn try: librarything_wait() resp = urllib2.urlopen( BOOK_URL, timeout=30).read() lt_lang_hits += 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if resp != 'invalid' and resp != 'unknown': booklang = resp # found a language code match = True myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, booklang)) logger.debug(u"LT language: " + booklang) except Exception as e: booklang = "" logger.error( "Error finding language: %s" % str(e)) if match: # We found a better language match if googlelang == "en" and booklang not in [ "en-US", "en-GB", "eng" ]: # these are all english, may need to expand this list booknamealt = item['volumeInfo'][ 'title'] logger.debug( "%s Google thinks [%s], we think [%s]" % (booknamealt, googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: booknamealt = item['volumeInfo']['title'] logger.debug('Skipped [%s] with language %s' % (booknamealt, booklang)) ignored += 1 continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = "" try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = "" if not booksub: series = "" seriesNum = "" else: try: series = booksub.split('(')[1].split( ' Series ')[0] except IndexError: series = "" if series.endswith(')'): series = series[:-1] try: seriesNum = booksub.split('(')[1].split( ' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" if not seriesNum and '#' in series: words = series.rsplit('#', 1) series = words[0].strip() seriesNum = words[1].strip() if not seriesNum and ' ' in series: words = series.rsplit(' ', 1) # has to be unicode for isnumeric() if (u"%s" % words[1]).isnumeric(): series = words[0] seriesNum = words[1] try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks'][ 'thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = "" bookname = item['volumeInfo']['title'] bookname = unaccented(bookname) dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) bookid = item['id'] # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # # Not sure if googlebooks does too, but we only want one... existing_book = myDB.match( 'SELECT Status,Manual FROM books WHERE BookID = "%s"' % bookid) if existing_book: book_status = existing_book['Status'] locked = existing_book['Manual'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: book_status = bookstatus # new_book status, or new_author status locked = False rejected = False check_status = False if re.match( '[^\w-]', bookname ): # remove books with bad characters in title logger.debug( "[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = True if not rejected and not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = True if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if bookdate > today()[:len(bookdate)]: logger.debug( 'Rejecting %s, future publication date %s' % (bookname, bookdate)) removedResults += 1 rejected = True if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName = "%s" COLLATE NOCASE and AuthorName = "%s" COLLATE NOCASE'% \ (bookname.replace('"', '""'), authorname.replace('"', '""')) match = myDB.match(cmd) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = True duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID="%s"' % bookid match = myDB.match(cmd) if match: # we have a book with this bookid already if bookname != match[ 'BookName'] or authorname != match[ 'AuthorName']: logger.debug( 'Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug( 'Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = True if check_status or not rejected: if book_status != "Ignored" and not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": today() } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + bookname + " " + bookdate) updated = False if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg, refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug( 'Failed to cache image for %s' % bookimg) seriesdict = {} if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug( u'Updated series: %s [%s]' % (bookid, seriesdict)) updated = True else: # librarything doesn't have series info. Any in the title? if series: seriesdict = { cleanName(unaccented(series)): seriesNum } setSeries(seriesdict, bookid) new_status = setStatus(bookid, seriesdict, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug( "[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug( "[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug( '[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) lastbook = myDB.match( 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action( 'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def get_author_books(self, authorid=None, authorname=None, bookstatus="Skipped", entrystatus='Active', refresh=False): # noinspection PyBroadException try: logger.debug('[%s] Now processing books with Google Books API' % authorname) # google doesnt like accents in author names set_url = self.url + quote('inauthor:"%s"' % unaccented_str(authorname)) api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 startindex = 0 resultcount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 number_results = 1 valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: while startindex < number_results: self.params['startIndex'] = startindex URL = set_url + '&' + urlencode(self.params) try: jsonresults, in_cache = gb_json_request(URL, useCache=not refresh) if jsonresults is None: number_results = 0 else: if not in_cache: api_hits += 1 number_results = jsonresults['totalItems'] except Exception as err: if hasattr(err, 'reason'): errmsg = err.reason else: errmsg = str(err) logger.warn('Google Books API Error [%s]: Check your API key or wait a while' % errmsg) break if number_results == 0: logger.warn('Found no results for %s' % authorname) break else: logger.debug('Found %s result%s for %s' % (number_results, plural(number_results), authorname)) startindex += 40 for item in jsonresults['items']: total_count += 1 book = bookdict(item) # skip if no author, no author is no book. if not book['author']: logger.debug('Skipped a result without authorfield.') continue isbnhead = "" if len(book['isbn']) == 10: isbnhead = book['isbn'][0:3] elif len(book['isbn']) == 13: isbnhead = book['isbn'][3:6] booklang = book['lang'] # do we care about language? if "All" not in valid_langs: if book['isbn']: # seems google lies to us, sometimes tells us books are in english when they are not if booklang == "Unknown" or booklang == "en": googlelang = booklang match = False lang = myDB.match('SELECT lang FROM languages where isbn=?', (isbnhead,)) if lang: booklang = lang['lang'] cache_hits += 1 logger.debug("Found cached language [%s] for [%s]" % (booklang, isbnhead)) match = True if not match: # no match in cache, try lookup dict if isbnhead: if len(book['isbn']) == 13 and book['isbn'].startswith('979'): for lang in lazylibrarian.isbn_979_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_979_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break elif (len(book['isbn']) == 10) or \ (len(book['isbn']) == 13 and book['isbn'].startswith('978')): for lang in lazylibrarian.isbn_978_dict: if isbnhead.startswith(lang): booklang = lazylibrarian.isbn_978_dict[lang] logger.debug("ISBN979 returned %s for %s" % (booklang, isbnhead)) match = True break if match: myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if not match: booklang = thingLang(book['isbn']) lt_lang_hits += 1 if booklang: match = True myDB.action('insert into languages values (?, ?)', (isbnhead, booklang)) if match: # We found a better language match if googlelang == "en" and booklang not in ["en-US", "en-GB", "eng"]: # these are all english, may need to expand this list logger.debug("%s Google thinks [%s], we think [%s]" % (book['name'], googlelang, booklang)) gb_lang_change += 1 else: # No match anywhere, accept google language booklang = googlelang # skip if language is in ignore list if booklang not in valid_langs: logger.debug('Skipped [%s] with language %s' % (book['name'], booklang)) ignored += 1 continue rejected = 0 check_status = False book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False existing_book = None bookname = book['name'] bookid = item['id'] if not bookname: logger.debug('Rejecting bookid %s for %s, no bookname' % (bookid, authorname)) removedResults += 1 rejected = 1 else: bookname = replace_all(unaccented(bookname), {':': '.', '"': '', '\'': ''}).strip() # GoodReads sometimes has multiple bookids for the same book (same author/title, different # editions) and sometimes uses the same bookid if the book is the same but the title is # slightly different. Not sure if googlebooks does too, but we only want one... cmd = 'SELECT Status,AudioStatus,Manual,BookAdded FROM books WHERE BookID=?' existing_book = myDB.match(cmd, (bookid,)) if existing_book: book_status = existing_book['Status'] audio_status = existing_book['AudioStatus'] locked = existing_book['Manual'] added = existing_book['BookAdded'] if locked is None: locked = False elif locked.isdigit(): locked = bool(int(locked)) else: if rejected in [3, 4, 5]: book_status = 'Ignored' audio_status = 'Ignored' else: book_status = bookstatus # new_book status, or new_author status audio_status = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] added = today() locked = False if not rejected and re.match('[^\w-]', bookname): # remove books with bad characters in title logger.debug("[%s] removed book for bad characters" % bookname) removedResults += 1 rejected = 2 if not rejected and lazylibrarian.CONFIG['NO_FUTURE']: # googlebooks sometimes gives yyyy, sometimes yyyy-mm, sometimes yyyy-mm-dd if book['date'] > today()[:len(book['date'])]: logger.debug('Rejecting %s, future publication date %s' % (bookname, book['date'])) removedResults += 1 rejected = 3 if not rejected and lazylibrarian.CONFIG['NO_PUBDATE']: if not book['date']: logger.debug('Rejecting %s, no publication date' % bookname) removedResults += 1 rejected = 4 if not rejected and lazylibrarian.CONFIG['NO_ISBN']: if not isbnhead: logger.debug('Rejecting %s, no isbn' % bookname) removedResults += 1 rejected = 5 if not rejected: cmd = 'SELECT BookID FROM books,authors WHERE books.AuthorID = authors.AuthorID' cmd += ' and BookName=? COLLATE NOCASE and AuthorName=? COLLATE NOCASE' match = myDB.match(cmd, (bookname.replace('"', '""'), authorname.replace('"', '""'))) if match: if match['BookID'] != bookid: # we have a different book with this author/title already logger.debug('Rejecting bookid %s for [%s][%s] already got %s' % (match['BookID'], authorname, bookname, bookid)) rejected = 6 duplicates += 1 if not rejected: cmd = 'SELECT AuthorName,BookName FROM books,authors' cmd += ' WHERE authors.AuthorID = books.AuthorID AND BookID=?' match = myDB.match(cmd, (bookid,)) if match: # we have a book with this bookid already if bookname != match['BookName'] or authorname != match['AuthorName']: logger.debug('Rejecting bookid %s for [%s][%s] already got bookid for [%s][%s]' % (bookid, authorname, bookname, match['AuthorName'], match['BookName'])) else: logger.debug('Rejecting bookid %s for [%s][%s] already got this book in database' % (bookid, authorname, bookname)) check_status = True duplicates += 1 rejected = 7 if check_status or not rejected or ( lazylibrarian.CONFIG['IMP_IGNORE'] and rejected in [3, 4, 5]): # dates, isbn if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": authorid, "BookName": bookname, "BookSub": book['sub'], "BookDesc": book['desc'], "BookIsbn": book['isbn'], "BookPub": book['pub'], "BookGenre": book['genre'], "BookImg": book['img'], "BookLink": book['link'], "BookRate": float(book['rate']), "BookPages": book['pages'], "BookDate": book['date'], "BookLang": booklang, "Status": book_status, "AudioStatus": audio_status, "BookAdded": added } resultcount += 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug("Book found: " + bookname + " " + book['date']) updated = False if 'nocover' in book['img'] or 'nophoto' in book['img']: # try to get a cover from another source workcover, source = getBookCover(bookid) if workcover: logger.debug('Updated cover for %s using %s' % (bookname, source)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) updated = True elif book['img'] and book['img'].startswith('http'): link, success, _ = cache_img("book", bookid, book['img'], refresh=refresh) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) updated = True else: logger.debug('Failed to cache image for %s' % book['img']) serieslist = [] if book['series']: serieslist = [('', book['seriesNum'], cleanName(unaccented(book['series']), '&/'))] if lazylibrarian.CONFIG['ADD_SERIES']: newserieslist = getWorkSeries(bookid) if newserieslist: serieslist = newserieslist logger.debug('Updated series: %s [%s]' % (bookid, serieslist)) updated = True setSeries(serieslist, bookid) new_status = setStatus(bookid, serieslist, bookstatus) if not new_status == book_status: book_status = new_status updated = True worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not existing_book: logger.debug("[%s] Added book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) added_count += 1 elif updated: logger.debug("[%s] Updated book: %s [%s] status %s" % (authorname, bookname, booklang, book_status)) updated_count += 1 else: book_ignore_count += 1 except KeyError: pass deleteEmptySeries() logger.debug('[%s] The Google Books API was hit %s time%s to populate book list' % (authorname, api_hits, plural(api_hits))) cmd = 'SELECT BookName, BookLink, BookDate, BookImg from books WHERE AuthorID=?' cmd += ' AND Status != "Ignored" order by BookDate DESC' lastbook = myDB.match(cmd, (authorid,)) if lastbook: # maybe there are no books [remaining] for this author lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] lastbookimg = lastbook['BookImg'] else: lastbookname = "" lastbooklink = "" lastbookdate = "" lastbookimg = "" controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": entrystatus, "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate, "LastBookImg": lastbookimg } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s unwanted language result%s for author" % (ignored, plural(ignored))) logger.debug("Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Found %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (resultcount, plural(resultcount))) myDB.action('insert into stats values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (authorname.replace('"', '""'), api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info("[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info("[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) except Exception: logger.error('Unhandled exception in GB.get_author_books: %s' % traceback.format_exc())
def find_book(bookid=None, queue=None): myDB = database.DBConnection() if not lazylibrarian.CONFIG['GB_API']: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.CONFIG['GB_API'] jsonresults, in_cache = get_json_request(URL) if not jsonresults: logger.debug('No results found for %s' % bookid) return bookname = jsonresults['volumeInfo']['title'] dic = {':': '.', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = unaccented(bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted this book booklang = jsonresults['volumeInfo']['language'] valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG']) if booklang not in valid_langs and 'All' not in valid_langs: logger.debug( 'Book %s googlebooks language does not match preference, %s' % (bookname, booklang)) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = "" series = "" seriesNum = "" try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = "" try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split( ')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = "" except KeyError: booksub = "" try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = "" try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = "" try: if jsonresults['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0][ 'identifier'] else: bookisbn = "" except KeyError: bookisbn = "" booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) GR = GoodReads(authorname) author = GR.find_author_id() if author: AuthorID = author['authorid'] match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorID="%s"' % AuthorID) if not match: match = myDB.match( 'SELECT AuthorID from authors WHERE AuthorName="%s"' % author['authorname']) if match: logger.debug( '%s: Changing authorid from %s to %s' % (author['authorname'], AuthorID, match['AuthorID'])) AuthorID = match[ 'AuthorID'] # we have a different authorid for that authorname else: # no author but request to add book, add author as "ignored" # User hit "add book" button from a search controlValueDict = {"AuthorID": AuthorID} newValueDict = { "AuthorName": author['authorname'], "AuthorImg": author['authorimg'], "AuthorLink": author['authorlink'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Ignored" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn("No AuthorID for %s, unable to add book %s" % (authorname, bookname)) return controlValueDict = {"BookID": bookid} newValueDict = { "AuthorID": AuthorID, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link, success = cache_img("book", bookid, bookimg) if success: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) else: logger.debug('Failed to cache image for %s' % bookimg) if lazylibrarian.CONFIG['ADD_SERIES']: # prefer series info from librarything seriesdict = getWorkSeries(bookid) if seriesdict: logger.debug(u'Updated series: %s [%s]' % (bookid, seriesdict)) else: if series: seriesdict = {cleanName(unaccented(series)): seriesNum} setSeries(seriesdict, bookid) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def processDir(reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DIRECTORY('Download') logger.debug('Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process yet.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: # if torrent, see if we can get current status from the downloader as the name # may have been changed once magnet resolved, or download started or completed # depending on torrent downloader. Usenet doesn't change the name. We like usenet. torrentname = '' try: logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source'])) if book['Source'] == 'TRANSMISSION': torrentname = transmission.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'UTORRENT': torrentname = utorrent.nameTorrent(book['DownloadID']) elif book['Source'] == 'RTORRENT': torrentname = rtorrent.getName(book['DownloadID']) elif book['Source'] == 'QBITTORRENT': torrentname = qbittorrent.getName(book['DownloadID']) elif book['Source'] == 'SYNOLOGY_TOR': torrentname = synology.getName(book['DownloadID']) elif book['Source'] == 'DELUGEWEBUI': torrentname = deluge.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'DELUGERPC': client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) try: client.connect() result = client.call('core.get_torrent_status', book['DownloadID'], {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: torrentname = unaccented_str(result['name']) except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) except Exception as e: logger.debug("Failed to get updated torrent name from %s for %s: %s" % (book['Source'], book['DownloadID'], str(e))) matchtitle = unaccented_str(book['NZBtitle']) if torrentname and torrentname != matchtitle: logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl'])) matchtitle = torrentname # here we could also check percentage downloaded or eta or status? # If downloader says it hasn't completed, no need to look for it. matches = [] logger.info('Looking for %s in %s' % (matchtitle, processpath)) for fname in downloads: # skip if failed before or incomplete torrents, or incomplete btsync extn = os.path.splitext(fname)[1] if extn not in ['.fail', '.part', '.bts', '.!ut']: # This is to get round differences in torrent filenames. # Usenet is ok, but Torrents aren't always returned with the name we searched for # We ask the torrent downloader for the torrent name, but don't always get an answer # so we try to do a "best match" on the name, there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match and match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours logger.debug('filename [%s] is a file' % os.path.join(processpath, fname)) if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname)) if bts_file(processpath): logger.debug("Skipping %s, found a .bts file" % processpath) else: fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) setperm(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: if lazylibrarian.DESTINATION_COPY: shutil.copyfile(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) else: shutil.move(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) except Exception as why: logger.debug("Failed to copy/move file %s to %s, %s" % (ourfile, dirname, str(why))) pp_path = os.path.join(processpath, fname) if os.path.isdir(pp_path): logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle)) if not os.listdir(pp_path): logger.debug("Skipping %s, folder is empty" % pp_path) elif bts_file(pp_path): logger.debug("Skipping %s, found a .bts file" % pp_path) else: matches.append([match, pp_path, book]) else: pp_path = os.path.join(processpath, fname) matches.append([match, pp_path, book]) # so we can report closest match else: logger.debug('Skipping %s' % fname) match = 0 if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] if match and match >= lazylibrarian.DLOAD_RATIO: logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: # it's a book logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # it's a magazine logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(processpath, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) else: # not recognised logger.debug('Nothing in database matching "%s"' % book['BookID']) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) if match: logger.debug(u'Closest match (%s%%): %s' % (match, pp_path)) #for match in matches: # logger.info('Match: %s%% %s' % (match[0], match[1])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) # calibre or ll copied/moved the files we want, now delete source files to_delete = True if book['NZBmode'] in ['torrent', 'magnet']: # Only delete torrents if we don't want to keep seeding if lazylibrarian.KEEP_SEEDING: logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle'])) to_delete = False else: # ask downloader to delete the torrent, but not the files # we may delete them later, depending on other settings if book['DownloadID'] != "unknown": logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower())) delete_task(book['Source'], book['DownloadID'], False) else: logger.warn("Unable to remove %s from %s, no DownloadID" % (book['NZBtitle'], book['Source'].lower())) if to_delete: # only delete the files if not in download root dir and if DESTINATION_COPY not set if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath): if os.path.isdir(pp_path): # calibre might have already deleted it? try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: dname, extn = os.path.splitext(directory) if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " found in download directory") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) # Now check for any that are still marked snatched... if lazylibrarian.TASK_AGE: snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) > 0: for snatch in snatched: # FUTURE: we could check percentage downloaded or eta? # if percentage is increasing, it's just slow try: when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S') when_snatched = time.mktime(when_snatched) diff = time.time() - when_snatched # time difference in seconds except: diff = 0 hours = int(diff / 3600) if hours >= lazylibrarian.TASK_AGE: logger.warn('%s was sent to %s %s hours ago, deleting failed task' % (snatch['NZBtitle'], snatch['Source'].lower(), hours)) # change status to "Failed", and ask downloader to delete task and files if snatch['BookID'] != 'unknown': myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID']) myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID']) delete_task(snatch['Source'], snatch['DownloadID'], True) if reset: scheduleJob(action='Restart', target='processDir') except Exception as e: logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: # We are performing a backlog search searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') # Clear cache if os.path.exists(".ProviderCache"): for f in os.listdir(".ProviderCache"): os.unlink("%s/%s" % (".ProviderCache", f)) # Clearing throttling timeouts t = SimpleCache.ThrottlingProcessor() t.lastRequestTime.clear() else: # The user has added a new book searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':'', ';':''} dicSearchFormatting = {'.':' +', ' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) # TRY SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) searchterm1 = author # + ' ' + lazylibrarian.EBOOK_TYPE searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8') searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8') searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NEWZNAB2: logger.info('No providers are set. use NEWZNAB.') counter = 0 for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book, "1") if lazylibrarian.NEWZNAB2: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST2) resultlist += providers.NewzNab(book, "2") if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '!':'', '-':'', '\s\s':' ', ' the ':' ', ' a ':' ', ' and ':' ', ' to ':' ', ' of ':' ', ' for ':' ', ' my ':' ', ' in ':' ', ' at ':' ', ' with ':' ' } bookName = book['bookName'] bookID = book['bookid'] bookName = re.sub('[\.\-\/]', ' ', bookName) bookName = re.sub(r'\(.*?\)', '', bookName) bookName = formatter.latinToAscii(formatter.replace_all(bookName.lower(), dictrepl)).strip() logger.debug(u'bookName %s' % bookName) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(str(nzb['nzbtitle']).lower(), dictrepl)).strip() logger.debug(u'nzbName %s' % nzbTitle) logger.debug("NZB Match %: " + str(fuzz.partial_ratio(bookName, nzbTitle))) if (fuzz.partial_ratio(bookName, nzbTitle) > 80): logger.debug(u'FOUND %s' % nzbTitle.lower()) addedCounter = addedCounter + 1 bookid = nzb['bookid'] nzbTitle = (book["authorName"] + ' - ' + book['bookName'] + ' LL.(' + bookID + ')').strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbTitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) break; if addedCounter == 0: logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.") counter = counter + 1
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] searchlist1 = [] if books is None: searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') else: searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', ':':''} dicSearchFormatting = {' ':' +', '.':' +', ' + ':' '} dicSearchFormatting1 = {' + ':' '} author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) #OLD SEARCH TERM searchterm = author + ' ' + book + ' ' + lazylibrarian.EBOOK_TYPE searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchterm = re.sub(r"\s\s+" , " ", searchterm) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm.strip()}) # TRY A SECCOND SEARCH TERM just using author name and book type author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting)) author = formatter.latinToAscii(formatter.replace_all(author, dicSearchFormatting1)) searchterm1 = '+' + author + ' +' + lazylibrarian.EBOOK_TYPE searchterm1 = re.sub('[\.\-\/]', ' ', searchterm1).encode('utf-8') searchterm1 = re.sub(r'\(.*?\)', '', searchterm1).encode('utf-8') searchterm1 = re.sub(r"\s\s+" , " ", searchterm1) # strip any double white space searchlist.append({"bookid": bookid, "bookName":searchbook[2], "authorName":searchbook[1], "searchterm": searchterm1.strip()}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No download method is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB and not lazylibrarian.NZBMATRIX: logger.info('No providers are set. use NEWZNAB or NZBMATRIX') counter = 0 for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB and not resultlist: logger.debug('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book) if lazylibrarian.NZBMATRIX and not resultlist: logger.debug('Searching NZB at provider NZBMatrix ...') resultlist = providers.NZBMatrix(book) if not resultlist: logger.debug("Adding book %s to queue." % book['searchterm']) else: dictrepl = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':'', '(':'', ')':'', '[':'', ']':'', '#':'', '0':'', '1':'', '2':'', '3':'', '4':'', '5':'', '6':'', '7':'', '8':'' , '9':'', '\'':'', ':':'', '\s\s':' ' } bookName = book['bookName'] bookName = re.sub('[\.\-\/]', ' ', bookName) bookName = re.sub(r'\(.*?\)', '', bookName) bookName = formatter.latinToAscii(formatter.replace_all(bookName, dictrepl)).strip() logger.debug(u'bookName %s' % bookName) addedCounter = 0 for nzb in resultlist: nzbTitle = formatter.latinToAscii(formatter.replace_all(nzb['nzbtitle'], dictrepl)).strip() logger.debug(u'nzbName %s' % nzbTitle) nameFound = 0 bookNameList = bookName.split() for word in bookNameList: if nzbTitle.lower().find(word.lower()) == -1: nameFound = -1 if nameFound == 0: logger.debug(u'FOUND %s' % nzbTitle.lower()) addedCounter = addedCounter + 1 bookid = nzb['bookid'] nzbTitle = (book["authorName"] + ' ' + bookName).strip() nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbTitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbTitle, nzburl) time.sleep(1) if addedCounter == 0: logger.info("No nzb's found for " + (book["authorName"] + ' ' + bookName).strip() + ". Adding book to queue.") counter = counter + 1
def processDir(): # rename this thread threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) # TODO - try exception on os.listdir - it throws debug level # exception if dir doesn't exist - bloody hard to catch try: downloads = os.listdir(processpath) except OSError: logger.error('Could not access [%s] directory ' % processpath) return False myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if snatched is None: logger.info('No books are snatched. Nothing to process.') elif downloads is None: logger.info('No downloads are found. Nothing to process.') else: ppcount = 0 for book in snatched: if book['NZBtitle'] in downloads: pp_path = os.path.join(processpath, book['NZBtitle']) logger.debug('Found book/mag folder %s.' % pp_path) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace('$Title', bookname) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep this for processing issues arriving out of order dest_path = lazylibrarian.MAG_DEST_FOLDER.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace('$Title', book['BookID']) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched NZB %s is not in download directory" % (book['NZBtitle'])) continue dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = formatter.latinToAscii(formatter.replace_all(dest_path, dic)) try: os.chmod(dest_path, 0777) except Exception, e: logger.debug("Could not chmod post-process directory: " + str(dest_path)) processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name, book['BookID']) if processBook: ppcount = ppcount + 1 # update nzbs controlValueDict = {"NZBurl": book['NZBurl']} newValueDict = {"Status": "Processed", "NZBDate": formatter.today()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue > book['AuxInfo']: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": formatter.today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": formatter.today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path) controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": formatter.today(), "IssueFile": dest_file} myDB.upsert("issues", newValueDict, controlValueDict) logger.info('Successfully processed: %s' % global_name) notifiers.notify_download(formatter.latinToAscii(global_name) + ' at ' + formatter.now()) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s' % pp_path) # # TODO Seems to be duplication here. Can we just scan once for snatched books # instead of scan for snatched and then scan for directories with "LL.(bookID)" in? # Should there be any directories with "LL.(bookID)" that aren't in snatched? # Maybe this was put in for manually downloaded books? # downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount: logger.info('%s books/mags have been processed.' % ppcount) else: logger.info('No snatched books/mags have been found')
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() if not lazylibrarian.GB_API: logger.warn('No GoogleBooks API key, check config') URL = 'https://www.googleapis.com/books/v1/volumes/' + \ str(bookid) + "?key=" + lazylibrarian.GB_API jsonresults, in_cache = self.get_request(URL) if jsonresults is None: logger.debug('No results found for %s' % bookname) return bookname = jsonresults['volumeInfo']['title'] bookname = bookname.replace(':', '').replace('"', '').replace("'", "") bookname = unidecode(u'%s' % bookname) bookname = bookname.strip() # strip whitespace try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug( 'Book %s does not contain author field, skipping' % bookname) return try: # warn if language is in ignore list, but user said they wanted # this book booklang = jsonresults['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug( 'Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') booklang = "Unknown" try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] try: series = booksub.split('(')[1].split(' Series ')[0] except IndexError: series = None try: seriesNum = booksub.split('(')[1].split(' Series ')[1].split(')')[0] if seriesNum[0] == '#': seriesNum = seriesNum[1:] except IndexError: seriesNum = None except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = bookwork.getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg.startswith('http'): link = bookwork.cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = bookwork.getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = bookwork.getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def get_capabilities(provider): """ query provider for caps if none loaded yet, or if config entry is too old and not set manually. """ match = False if len(provider['UPDATED']) == 10: # any stored values? match = True if (formatter.age(provider['UPDATED']) > lazylibrarian.CACHE_AGE) and not provider['MANUAL']: logger.debug('Stored capabilities for %s are too old' % provider['HOST']) match = False if match: logger.debug('Using stored capabilities for %s' % provider['HOST']) else: host = provider['HOST'] if not str(host)[:4] == "http": host = 'http://' + host URL = host + '/api?t=caps&apikey=' + provider['API'] logger.debug('Requesting capabilities for %s' % URL) request = urllib2.Request(URL) if lazylibrarian.PROXY_HOST: request.set_proxy(lazylibrarian.PROXY_HOST, lazylibrarian.PROXY_TYPE) request.add_header('User-Agent', common.USER_AGENT) resp = "" try: resp = urllib2.urlopen(request, timeout=30) # don't get stuck except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: logger.debug("Error getting capabilities: %s" % e) resp = "" if resp: if str(resp.getcode()).startswith("2"): # (200 OK etc) logger.debug(u"Got capabilities for %s" % request.get_full_url()) try: source_xml = resp.read() # .decode('utf-8') data = ElementTree.fromstring(source_xml) except: logger.debug(u"Error getting xml from %s" % URL) data = None if len(data): logger.debug(u"Parsing xml for capabilities of %s" % URL) ############################################################################# # book search isn't mentioned in the caps xml returned by # nzbplanet,jackett,oznzb,usenet-crawler, so we can't use it as a test # but the newznab+ ones usually support t=book and categories in 7000 range # whereas nZEDb ones don't support t=book and use categories in 8000 range # also some providers give searchtype but no supportedparams, so we still # can't tell what queries will be accepted # also category names can be lowercase or Mixed, magazine subcat name isn't # consistent, and subcat can be just subcat or category/subcat subcat > lang # eg "Magazines" "Mags" or "Books/Magazines" "Mags > French" # Load all languages for now as we don't know which the user might want ############################################################################# # # set some defaults # provider['GENERALSEARCH'] = '' provider['EXTENDED'] = '1' provider['BOOKCAT'] = '' provider['MAGCAT'] = '' provider['BOOKSEARCH'] = '' provider['MAGSEARCH'] = '' # search = data.find('searching/search') if search is not None: if 'available' in search.attrib: if search.attrib['available'] == 'yes': provider['GENERALSEARCH'] = 'search' categories = data.getiterator('category') for cat in categories: if 'name' in cat.attrib: if cat.attrib['name'].lower() == 'books': bookcat = cat.attrib[ 'id'] # keep main bookcat for later provider['BOOKCAT'] = bookcat provider['MAGCAT'] = '' if provider['BOOKCAT'] == '7000': # looks like newznab+, should support book-search provider['BOOKSEARCH'] = 'book' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib[ 'available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' else: # looks like nZEDb, probably no book-search provider['BOOKSEARCH'] = '' # but check in case search = data.find('searching/book-search') if search is not None: if 'available' in search.attrib: if search.attrib[ 'available'] == 'yes': provider['BOOKSEARCH'] = 'book' else: provider['BOOKSEARCH'] = '' subcats = cat.getiterator('subcat') for subcat in subcats: if 'ebook' in subcat.attrib['name'].lower( ): provider['BOOKCAT'] = "%s,%s" % ( provider['BOOKCAT'], subcat.attrib['id']) if 'magazines' in subcat.attrib[ 'name'].lower( ) or 'mags' in subcat.attrib[ 'name'].lower(): if provider['MAGCAT']: provider['MAGCAT'] = "%s,%s" % ( provider['MAGCAT'], subcat.attrib['id']) else: provider['MAGCAT'] = subcat.attrib[ 'id'] # if no specific magazine subcategory, use books if not provider['MAGCAT']: provider['MAGCAT'] = bookcat logger.debug("Categories: Books %s : Mags %s" % (provider['BOOKCAT'], provider['MAGCAT'])) provider['UPDATED'] = formatter.today() else: logger.warn( u"Unable to get capabilities for %s: No data returned" % URL) else: logger.warn(u"Unable to get capabilities for %s: Got %s" % (URL, resp.getcode())) return provider
def get_author_books(self, authorid=None, authorname=None, refresh=False): books_dict = [] set_url = self.url + urllib.quote('inauthor:' + '"' + authorname + '"') URL = set_url + '&' + urllib.urlencode(self.params) api_hits = 0 logger.info('[%s] Now processing books with Google Books API' % authorname) #Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) try: startindex = 0 resultcount = 0 removedResults = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 while True: self.params['startIndex'] = startindex URL = set_url + '&' + urllib.urlencode(self.params) try: jsonresults = json.JSONDecoder().decode( urllib2.urlopen(URL, timeout=30).read()) api_hits = api_hits + 1 number_results = jsonresults['totalItems'] logger.debug('[%s] Searching url: %s' % (authorname, URL)) if number_results == 0: logger.info('Found no results for %s with value: %s' % (api_value, self.name)) break else: pass except HTTPError, err: logger.Error( 'Google API returned HTTP Error - probably time/rate limiting - [%s]' % err.msg) startindex = startindex + 40 for item in jsonresults['items']: total_count = total_count + 1 # skip if no author, no author is no book. try: Author = item['volumeInfo']['authors'][0] except KeyError: logger.debug('Skipped a result without authorfield.') continue try: #skip if language is in ignore list booklang = item['volumeInfo']['language'] valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if booklang not in valid_langs: logger.debug('Skipped a book with language %s' % booklang) ignored = ignored + 1 continue except KeyError: ignored = ignored + 1 logger.debug( 'Skipped a result where no language is found') continue try: bookpub = item['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = item['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = item['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = item['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = item['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = item['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = item['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = item['volumeInfo']['description'] except KeyError: bookdesc = None try: if item['volumeInfo']['industryIdentifiers'][0][ 'type'] == 'ISBN_10': bookisbn = item['volumeInfo'][ 'industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None bookid = item['id'] bookname = item['volumeInfo']['title'] booklink = item['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) find_book_status = myDB.select( "SELECT * FROM books WHERE BookID = '%s'" % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] else: book_status = "Skipped" if not (re.match('[^\w-]', bookname) ): #remove books with bad caracters in title if book_status != "Ignored": controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": authorid, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": book_status, "BookAdded": formatter.today() } resultcount = resultcount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"book found " + bookname + " " + bookdate) if not find_book_status: logger.info("[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: updated_count = updated_count + 1 logger.info("[%s] Updated book: %s" % (authorname, bookname)) else: book_ignore_count = book_ignore_count + 1 else: removedResults = removedResults + 1 if startindex >= number_results: break else: continue except KeyError: pass logger.info( '[%s] The Google Books API was hit %s times to populate book list' % (authorname, str(api_hits))) lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorID='%s' AND Status != 'Ignored' order by BookDate DESC" % authorid).fetchone() unignoredbooks = myDB.select( "SELECT COUNT(BookName) as unignored FROM books WHERE AuthorID='%s' AND Status != 'Ignored'" % authorid) bookCount = myDB.select( "SELECT COUNT(BookName) as counter FROM books WHERE AuthorID='%s'" % authorid) controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "TotalBooks": bookCount[0]['counter'], "UnignoredBooks": unignoredbooks[0]['unignored'], "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.debug("Found %s total books for author" % total_count) logger.debug("Removed %s bad language results for author" % ignored) logger.debug("Removed %s bad character results for author" % removedResults) logger.debug("Ignored %s books by author marked as Ignored" % book_ignore_count) logger.debug("Imported/Updated %s books for author" % resultcount) if refresh: logger.info( "[%s] Book processing complete: Added %s books / Updated %s books" % (authorname, str(added_count), str(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s books to the database" % (authorname, str(added_count))) return books_dict
def searchbook(books=None): # rename this thread threading.currentThread().name = "SEARCHBOOKS" myDB = database.DBConnection() searchlist = [] #If user did not pass a book, then return all wanted books if books is None: searchbooks = myDB.select('SELECT BookID, AuthorName, Bookname from books WHERE Status="Wanted"') #Otherwise return all books with matching ID else: searchbooks = [] for book in books: searchbook = myDB.select('SELECT BookID, AuthorName, BookName from books WHERE BookID=? AND Status="Wanted"', [book['bookid']]) for terms in searchbook: searchbooks.append(terms) for searchbook in searchbooks: bookid = searchbook[0] author = searchbook[1] book = searchbook[2] #Strip illegal chars dic = {'...':'', ' & ':' ', ' = ': ' ', '?':'', '$':'s', ' + ':' ', '"':'', ',':'', '*':''} #Convert Author and Book to ASCII author = formatter.latinToAscii(formatter.replace_all(author, dic)) book = formatter.latinToAscii(formatter.replace_all(book, dic)) #Build Searchlist searchterm = author + ' ' + book searchterm = re.sub('[\.\-\/]', ' ', searchterm).encode('utf-8') searchlist.append({"bookid": bookid, "searchterm": searchterm}) if not lazylibrarian.SAB_HOST and not lazylibrarian.BLACKHOLE: logger.info('No downloadmethod is set, use SABnzbd or blackhole') if not lazylibrarian.NEWZNAB: logger.info('No providers are set.') #Conatct all usenet providers and search for book in searchlist: resultlist = [] if lazylibrarian.NEWZNAB and not resultlist: logger.info('Searching NZB\'s at provider %s ...' % lazylibrarian.NEWZNAB_HOST) resultlist = providers.NewzNab(book) if lazylibrarian.NZBMATRIX and not resultlist: logger.info('Searching NZB at provider NZBMatrix ...') resultlist = providers.NZBMatrix(book) if not resultlist: logger.info("Search didn't have results. Adding book %s to queue." % book['searchterm']) else: for nzb in resultlist: bookid = nzb['bookid'] nzbtitle = nzb['nzbtitle'] nzburl = nzb['nzburl'] nzbprov = nzb['nzbprov'] controlValueDict = {"NZBurl": nzburl} newValueDict = { "NZBprov": nzbprov, "BookID": bookid, "NZBdate": formatter.today(), "NZBtitle": nzbtitle, "Status": "Skipped" } myDB.upsert("wanted", newValueDict, controlValueDict) snatchedbooks = myDB.action('SELECT * from books WHERE BookID=? and Status="Snatched"', [bookid]).fetchone() if not snatchedbooks: snatch = DownloadMethod(bookid, nzbprov, nzbtitle, nzburl) time.sleep(1)
def get_author_books(self, authorid=None, authorname=None, refresh=False): api_hits = 0 gr_lang_hits = 0 lt_lang_hits = 0 gb_lang_change = 0 cache_hits = 0 not_cached = 0 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + urllib.urlencode( self.params) # Artist is loading myDB = database.DBConnection() controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) books_dict = [] try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) except Exception as e: logger.error("Error fetching author books: %s" % e) return books_dict if rootxml is None: logger.debug("Error requesting author books") return books_dict if not in_cache: api_hits = api_hits + 1 resultxml = rootxml.getiterator('book') valid_langs = ([ valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',') ]) if not len(resultxml): logger.warn('[%s] No books found for author with ID: %s' % (authorname, authorid)) else: logger.debug("[%s] Now processing books with GoodReads API" % authorname) resultsCount = 0 removedResults = 0 duplicates = 0 ignored = 0 added_count = 0 updated_count = 0 book_ignore_count = 0 total_count = 0 logger.debug(u"url " + URL) authorNameResult = rootxml.find('./author/name').text logger.debug(u"author name " + authorNameResult) loopCount = 1 while resultxml is not None: for book in resultxml: total_count = total_count + 1 if (book.find('publication_year').text is None): pubyear = "0000" else: pubyear = book.find('publication_year').text try: bookimg = book.find('image_url').text if ('nocover' in bookimg): bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' # PAB this next section tries to get the book language using the isbn13 to look it up. If no isbn13 we skip the # book entirely, rather than including it with an "Unknown" language. Changed this so we can still include the book # with language set to "Unknown". There is a setting in config.ini to allow or skip books with "Unknown" language # if you really don't want to include them. # Not all GR books have isbn13 filled in, but all have a GR bookid, which we've already got, so use that. # Also, with GR API rules we can only call the API once per second, which slows us down a lot when all we want # is to get the language. We sleep for one second per book that GR knows about for each author you have in your # library. The libraryThing API has the same 1 second restriction, and is limited to 1000 hits per day, but has # fewer books with unknown language. To get around this and speed up the process, see if we already have a book # in the database with a similar start to the ISBN. The way ISBNs work, digits 3-5 of a 13 char ISBN or digits 0-2 # of a 10 digit ISBN indicate the region/language so if two books have the same 3 digit isbn code, they _should_ # be the same language. # I ran a simple python script on my library of 1500 books, and these codes were 100% correct on matching book # languages, no mis-matches. It did result in a small number of books with "unknown" language being wrongly matched # but most "unknown" were matched to the correct language. # We could look up ISBNs we already know about in the database, but this only holds books in the languages we want # to keep, which reduces the number of cache hits, so we create a new database table, holding ALL results including # the ISBNs for languages we don't want and books we reject. # The new table is created (if not exists) in init.py so by the time we get here there is an existing table. # If we haven't an already matching partial ISBN, look up language code from libraryThing # "http://www.librarything.com/api/thingLang.php?isbn=1234567890" # If you find a matching language, add it to the database. If "unknown" or "invalid", try GR as maybe GR can # provide a match. # If both LT and GR return unknown, add isbn to db as "unknown". No point in repeatedly asking LT for a code # it's told you it doesn't know. # As an extra option, if language includes "All" in config.ini, we can skip this whole section and process # everything much faster by not querying for language at all. # It does mean we include a lot of unwanted foreign translations in the database, but it's _much_ faster. bookLanguage = "Unknown" find_field = "id" isbn = "" isbnhead = "" if "All" not in valid_langs: # do we care about language if (book.find('isbn').text is not None): find_field = "isbn" isbn = book.find('isbn').text isbnhead = isbn[0:3] else: if (book.find('isbn13').text is not None): find_field = "isbn13" isbn = book.find('isbn13').text isbnhead = isbn[3:6] if (find_field != 'id'): # isbn or isbn13 found match = myDB.action( 'SELECT lang FROM languages where isbn = "%s"' % (isbnhead)).fetchone() if (match): bookLanguage = match['lang'] cache_hits = cache_hits + 1 logger.debug( "Found cached language [%s] for %s [%s]" % (bookLanguage, find_field, isbnhead)) else: # no match in cache, try searching librarything for a language code using the isbn # if no language found, librarything return value is "invalid" or "unknown" # returns plain text, not xml BOOK_URL = 'http://www.librarything.com/api/thingLang.php?isbn=' + isbn try: librarything_wait() resp = urllib2.urlopen(BOOK_URL, timeout=30).read() lt_lang_hits = lt_lang_hits + 1 logger.debug( "LibraryThing reports language [%s] for %s" % (resp, isbnhead)) if ('invalid' in resp or 'Unknown' in resp): find_field = "id" # reset the field to force search on goodreads else: bookLanguage = resp # found a language code myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug(u"LT language %s: %s" % (isbnhead, bookLanguage)) except Exception as e: logger.error( "Error finding LT language result for [%s], %s" % (isbn, e)) find_field = "id" # reset the field to search on goodreads if (find_field == 'id'): # [or bookLanguage == "Unknown"] no earlier match, we'll have to search the goodreads api try: if (book.find(find_field).text is not None): BOOK_URL = 'http://www.goodreads.com/book/show?id=' + \ book.find(find_field).text + '&' + urllib.urlencode(self.params) logger.debug(u"Book URL: " + BOOK_URL) try: time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) BOOK_rootxml, in_cache = get_xml_request( BOOK_URL) if BOOK_rootxml is None: logger.debug( 'Error requesting book language code' ) bookLanguage = "" else: if not in_cache: # only update last_goodreads if the result wasn't found in the cache lazylibrarian.LAST_GOODREADS = time_now bookLanguage = BOOK_rootxml.find( './book/language_code').text except Exception as e: logger.error( "Error finding book results: %s" % e) if not in_cache: gr_lang_hits = gr_lang_hits + 1 if not bookLanguage: bookLanguage = "Unknown" if (isbnhead != ""): # GR didn't give an isbn so we can't cache it, just use language for this book myDB.action( 'insert into languages values ("%s", "%s")' % (isbnhead, bookLanguage)) logger.debug( "GoodReads reports language [%s] for %s" % (bookLanguage, isbnhead)) else: not_cached = not_cached + 1 logger.debug(u"GR language: " + bookLanguage) else: logger.debug( "No %s provided for [%s]" % (find_field, book.find('title').text)) # continue except Exception as e: logger.debug(u"An error has occured: %s" % e) if bookLanguage not in valid_langs: logger.debug('Skipped a book with language %s' % bookLanguage) ignored = ignored + 1 continue bookname = book.find('title').text bookid = book.find('id').text bookdesc = book.find('description').text bookisbn = book.find('isbn').text bookpub = book.find('publisher').text booklink = book.find('link').text bookrate = float(book.find('average_rating').text) bookpages = book.find('num_pages').text bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] else: booksub = '' dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series, seriesNum = bookSeries(booksub) else: series, seriesNum = bookSeries(bookname) # GoodReads sometimes has multiple bookids for the same book (same author/title, different editions) # and sometimes uses the same bookid if the book is the same but the title is slightly different # We use bookid, then reject if another author/title has a different bookid so we just keep one... find_book_status = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_book_status: for resulted in find_book_status: book_status = resulted['Status'] locked = resulted['Manual'] else: book_status = lazylibrarian.NEWBOOK_STATUS locked = False rejected = False if re.match('[^\w-]', bookname ): # reject books with bad characters in title logger.debug(u"removed result [" + bookname + "] for bad characters") removedResults = removedResults + 1 rejected = True if not rejected and not bookname: logger.debug( 'Rejecting bookid %s for %s, no bookname' % (bookid, authorNameResult)) removedResults = removedResults + 1 rejected = True if not rejected: find_books = myDB.select( 'SELECT * FROM books WHERE BookName = "%s" and AuthorName = "%s"' % (bookname, authorNameResult)) if find_books: for find_book in find_books: if find_book['BookID'] != bookid: # we have a book with this author/title already logger.debug( 'Rejecting bookid %s for [%s][%s] already got %s' % (find_book['BookID'], authorNameResult, bookname, bookid)) duplicates = duplicates + 1 rejected = True break if not rejected: find_books = myDB.select( 'SELECT * FROM books WHERE BookID = "%s"' % bookid) if find_books: # we have a book with this bookid already logger.debug( 'Rejecting bookid %s for [%s][%s] already got this bookid in database' % (bookid, authorNameResult, bookname)) duplicates = duplicates + 1 rejected = True break if not rejected: if book_status != "Ignored": if not locked: controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorNameResult, "AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"Book found: " + book.find('title').text + " " + pubyear) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug( u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict) if not find_book_status: logger.debug(u"[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.debug(u"[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else: book_ignore_count = book_ignore_count + 1 loopCount = loopCount + 1 URL = 'http://www.goodreads.com/author/list/' + authorid + '.xml?' + \ urllib.urlencode(self.params) + '&page=' + str(loopCount) resultxml = None try: rootxml, in_cache = get_xml_request(URL, useCache=not refresh) if rootxml is None: logger.debug('Error requesting next page of results') else: resultxml = rootxml.getiterator('book') if not in_cache: api_hits = api_hits + 1 except Exception as e: resultxml = None logger.error("Error finding next page of results: %s" % e) if resultxml is not None: if all(False for book in resultxml): # returns True if iterator is empty resultxml = None lastbook = myDB.action( 'SELECT BookName, BookLink, BookDate from books WHERE AuthorID="%s" \ AND Status != "Ignored" order by BookDate DESC' % authorid).fetchone() if lastbook: lastbookname = lastbook['BookName'] lastbooklink = lastbook['BookLink'] lastbookdate = lastbook['BookDate'] else: lastbookname = None lastbooklink = None lastbookdate = None controlValueDict = {"AuthorID": authorid} newValueDict = { "Status": "Active", "LastBook": lastbookname, "LastLink": lastbooklink, "LastDate": lastbookdate } myDB.upsert("authors", newValueDict, controlValueDict) # This is here because GoodReads sometimes has several entries with the same BookID! modified_count = added_count + updated_count logger.debug("Found %s total book%s for author" % (total_count, plural(total_count))) logger.debug("Removed %s bad language result%s for author" % (ignored, plural(ignored))) logger.debug( "Removed %s bad character or no-name result%s for author" % (removedResults, plural(removedResults))) logger.debug("Removed %s duplicate result%s for author" % (duplicates, plural(duplicates))) logger.debug("Ignored %s book%s by author marked as Ignored" % (book_ignore_count, plural(book_ignore_count))) logger.debug("Imported/Updated %s book%s for author" % (modified_count, plural(modified_count))) myDB.action( 'insert into stats values ("%s", %i, %i, %i, %i, %i, %i, %i, %i, %i)' % (authorname, api_hits, gr_lang_hits, lt_lang_hits, gb_lang_change, cache_hits, ignored, removedResults, not_cached, duplicates)) if refresh: logger.info( "[%s] Book processing complete: Added %s book%s / Updated %s book%s" % (authorname, added_count, plural(added_count), updated_count, plural(updated_count))) else: logger.info( "[%s] Book processing complete: Added %s book%s to the database" % (authorname, added_count, plural(added_count))) return books_dict
"AuthorID": authorid, "AuthorLink": None, "BookName": bookname, "BookSub": None, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": pubyear, "BookLang": bookLanguage, "Status": book_status, "BookAdded": formatter.today(), "Series": series, "SeriesOrder": seriesOrder } resultsCount = resultsCount + 1 myDB.upsert("books", newValueDict, controlValueDict) logger.debug(u"book found " + book.find('title').text + " " + pubyear) if not find_book_status: logger.info("[%s] Added book: %s" % (authorname, bookname)) added_count = added_count + 1 else: logger.info("[%s] Updated book: %s" % (authorname, bookname)) updated_count = updated_count + 1 else:
def find_book(self, bookid=None, queue=None): myDB = database.DBConnection() URL = 'https://www.goodreads.com/book/show/' + bookid + '?' + urllib.urlencode(self.params) try: rootxml, in_cache = get_xml_request(URL) if rootxml is None: logger.debug("Error requesting book") return except Exception as e: logger.error("Error finding book: %s" % e) return bookLanguage = rootxml.find('./book/language_code').text bookname = rootxml.find('./book/title').text if not bookLanguage: bookLanguage = "Unknown" # # PAB user has said they want this book, don't block for bad language, just warn # valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if bookLanguage not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) if (rootxml.find('./book/publication_year').text is None): bookdate = "0000" else: bookdate = rootxml.find('./book/publication_year').text try: bookimg = rootxml.find('./book/img_url').text if 'assets/nocover' in bookimg: bookimg = 'images/nocover.png' except (KeyError, AttributeError): bookimg = 'images/nocover.png' authorname = rootxml.find('./book/authors/author/name').text bookdesc = rootxml.find('./book/description').text bookisbn = rootxml.find('./book/isbn').text bookpub = rootxml.find('./book/publisher').text booklink = rootxml.find('./book/link').text bookrate = float(rootxml.find('./book/average_rating').text) bookpages = rootxml.find('.book/num_pages').text name = authorname GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] booksub = '' bookname = unaccented(bookname) if ': ' in bookname: parts = bookname.split(': ', 1) bookname = parts[0] booksub = parts[1] dic = {':': '', '"': '', '\'': ''} bookname = replace_all(bookname, dic) bookname = bookname.strip() # strip whitespace booksub = replace_all(booksub, dic) booksub = booksub.strip() # strip whitespace if booksub: series,seriesNum = bookSeries(booksub) else: series,seriesNum = bookSeries(bookname) controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": None, "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": None, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": bookLanguage, "Status": "Wanted", "BookAdded": today(), "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) logger.debug("%s added to the books database" % bookname) if 'nocover' in bookimg or 'nophoto' in bookimg: # try to get a cover from librarything workcover = getBookCover(bookid) if workcover: logger.debug(u'Updated cover for %s to %s' % (bookname, workcover)) controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": workcover} myDB.upsert("books", newValueDict, controlValueDict) elif bookimg and bookimg.startswith('http'): link = cache_cover(bookid, bookimg) if link is not None: controlValueDict = {"BookID": bookid} newValueDict = {"BookImg": link} myDB.upsert("books", newValueDict, controlValueDict) if seriesNum == None: # try to get series info from librarything series, seriesNum = getWorkSeries(bookid) if seriesNum: logger.debug(u'Updated series: %s [%s]' % (series, seriesNum)) controlValueDict = {"BookID": bookid} newValueDict = { "Series": series, "SeriesNum": seriesNum } myDB.upsert("books", newValueDict, controlValueDict) worklink = getWorkPage(bookid) if worklink: controlValueDict = {"BookID": bookid} newValueDict = {"WorkPage": worklink} myDB.upsert("books", newValueDict, controlValueDict)
def find_book(self, bookid=None, queue=None): threading.currentThread().name = "GB-ADD-BOOK" myDB = database.DBConnection() URL = 'https://www.googleapis.com/books/v1/volumes/' + str(bookid) + "?key="+lazylibrarian.GB_API jsonresults = json.JSONDecoder().decode(urllib2.urlopen(URL, timeout=30).read()) # Darkie67: # replacing German Umlauts and filtering out ":" # booknamealt = jsonresults['volumeInfo']['title'] booknametmp1=booknamealt.replace(u'\xf6',u'oe') booknametmp2=booknametmp1.replace(u'\xe4',u'ae') booknametmp3=booknametmp2.replace(u'\xdf',u'ss') booknametmp4=booknametmp3.replace(u'\xc4',u'Ae') booknametmp5=booknametmp4.replace(u'\xdc',u'Ue') booknametmp6=booknametmp5.replace(u'\xd6',u'Oe') booknametmp7=booknametmp6.replace(':','') bookname=booknametmp7.replace(u'\xfc',u'ue') # Darkie67 end try: authorname = jsonresults['volumeInfo']['authors'][0] except KeyError: logger.debug('Book %s does not contain author field' % bookname) try: #skip if language is in ignore list booklang = jsonresults['volumeInfo']['language'] valid_langs = ([valid_lang.strip() for valid_lang in lazylibrarian.IMP_PREFLANG.split(',')]) if booklang not in valid_langs: logger.debug('Book %s language does not match preference' % bookname) except KeyError: logger.debug('Book does not have language field') try: bookpub = jsonresults['volumeInfo']['publisher'] except KeyError: bookpub = None try: booksub = jsonresults['volumeInfo']['subtitle'] except KeyError: booksub = None try: bookdate = jsonresults['volumeInfo']['publishedDate'] except KeyError: bookdate = '0000-00-00' try: bookimg = jsonresults['volumeInfo']['imageLinks']['thumbnail'] except KeyError: bookimg = 'images/nocover.png' try: bookrate = jsonresults['volumeInfo']['averageRating'] except KeyError: bookrate = 0 try: bookpages = jsonresults['volumeInfo']['pageCount'] except KeyError: bookpages = 0 try: bookgenre = jsonresults['volumeInfo']['categories'][0] except KeyError: bookgenre = None try: bookdesc = jsonresults['volumeInfo']['description'] except KeyError: bookdesc = None try: if jsonresults['volumeInfo']['industryIdentifiers'][0]['type'] == 'ISBN_10': bookisbn = jsonresults['volumeInfo']['industryIdentifiers'][0]['identifier'] else: bookisbn = None except KeyError: bookisbn = None booklink = jsonresults['volumeInfo']['canonicalVolumeLink'] bookrate = float(bookrate) name = jsonresults['volumeInfo']['authors'][0] GR = GoodReads(name) author = GR.find_author_id() if author: AuthorID = author['authorid'] controlValueDict = {"BookID": bookid} newValueDict = { "AuthorName": authorname, "AuthorID": AuthorID, "AuthorLink": "", "BookName": bookname, "BookSub": booksub, "BookDesc": bookdesc, "BookIsbn": bookisbn, "BookPub": bookpub, "BookGenre": bookgenre, "BookImg": bookimg, "BookLink": booklink, "BookRate": bookrate, "BookPages": bookpages, "BookDate": bookdate, "BookLang": booklang, "Status": "Wanted", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) logger.info("%s added to the books database" % bookname)