def search(self, name): myDB = database.DBConnection() if lazylibrarian.BOOK_API == "GoogleBooks": GB = GoogleBooks(name) queue = Queue.Queue() search_api = threading.Thread(target=GB.find_results, args=[name, queue]) search_api.start() elif lazylibrarian.BOOK_API == "GoodReads": queue = Queue.Queue() GR = GoodReads(name) search_api = threading.Thread(target=GR.find_results, args=[name, queue]) search_api.start() if len(name) == 0: raise cherrypy.HTTPRedirect("config") search_api.join() searchresults = queue.get() authorsearch = myDB.select("SELECT * from authors") authorlist = [] for item in authorsearch: authorlist.append(item['AuthorName']) booksearch = myDB.select("SELECT * from books") booklist = [] for item in booksearch: booklist.append(item['BookID']) sortedlist_final = sorted(searchresults, key=itemgetter('highest_fuzz', 'num_reviews'), reverse=True) return serve_template(templatename="searchresults.html", title='Search Results for: "' + name + '"', searchresults=sortedlist_final, authorlist=authorlist, booklist=booklist, booksearch=booksearch, type=type)
def addAuthorToDB(authorname=None, refresh=False): myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] if 'nophoto' in authorimg: authorimg = getAuthorImage(authorid) if authorimg and authorimg.startswith('http'): newimg = cache_cover(authorid, authorimg) if newimg: authorimg = newimg controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) update_totals(authorid) logger.debug("[%s] Author update complete" % authorname)
def import_book(bookid): """ search goodreads or googlebooks for a bookid and import the book """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, "Wanted"]).start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, "Wanted"]).start()
def search(self, name, type): GB = GoogleBooks(name, type) if len(name) == 0: raise cherrypy.HTTPRedirect("config") else: searchresults = GB.find_results() return serve_template(templatename="searchresults.html", title='Search Results for: "' + name + '"', searchresults=searchresults, type=type)
def _addBook(self, **kwargs): if 'id' not in kwargs: self.data = 'Missing parameter: id' return if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(kwargs['id']) threading.Thread(target=GB.find_book, name='API-GBRESULTS', args=[kwargs['id']]).start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(kwargs['id']) threading.Thread(target=GR.find_book, name='API-GRRESULTS', args=[kwargs['id']]).start()
def addAuthorToDB(authorname=None, refresh=False): threading.currentThread().name = "DBIMPORT" myDB = database.DBConnection() GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.warn(u"Nothing found for %s" % authorname) myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return # process books if lazylibrarian.BOOK_API == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, refresh=refresh) elif lazylibrarian.BOOK_API == "GoodReads": GR.get_author_books(authorid, authorname, refresh=refresh) logger.debug("[%s] Author update complete" % authorname)
def addBook(self, bookid=None): myDB = database.DBConnection() booksearch = myDB.select("SELECT * from books WHERE BookID=?", [bookid]) if booksearch: myDB.upsert("books", {'Status': 'Wanted'}, {'BookID': bookid}) for book in booksearch: AuthorName = book['AuthorName'] authorsearch = myDB.select("SELECT * from authors WHERE AuthorName=?", [AuthorName]) if authorsearch: #update authors needs to be updated every time a book is marked differently lastbook = myDB.action("SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' AND Status != 'Ignored' order by BookDate DESC" % AuthorName).fetchone() unignoredbooks = myDB.select("SELECT COUNT(BookName) as unignored FROM books WHERE AuthorName='%s' AND Status != 'Ignored'" % AuthorName) bookCount = myDB.select("SELECT COUNT(BookName) as counter FROM books WHERE AuthorName='%s'" % AuthorName) countbooks = myDB.action('SELECT COUNT(*) FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % AuthorName).fetchone() havebooks = int(countbooks[0]) controlValueDict = {"AuthorName": AuthorName} newValueDict = { "TotalBooks": bookCount[0]['counter'], "UnignoredBooks": unignoredbooks[0]['unignored'], "HaveBooks": havebooks, "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) else: if lazylibrarian.BOOK_API == "GoogleBooks": GB = GoogleBooks(bookid) queue = Queue.Queue() find_book = threading.Thread(target=GB.find_book, args=[bookid, queue]) find_book.start() elif lazylibrarian.BOOK_API == "GoodReads": queue = Queue.Queue() GR = GoodReads(bookid) find_book = threading.Thread(target=GR.find_book, args=[bookid, queue]) find_book.start() if len(bookid) == 0: raise cherrypy.HTTPRedirect("config") find_book.join() books = [] mags = False books.append({"bookid": bookid}) threading.Thread(target=searchbook, args=[books, mags]).start() raise cherrypy.HTTPRedirect("books")
def _findBook(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(kwargs['name']) queue = Queue.Queue() search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[kwargs['name'], queue]) search_api.start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": queue = Queue.Queue() GR = GoodReads(kwargs['name']) search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[kwargs['name'], queue]) search_api.start() search_api.join() self.data = queue.get()
def search_for(searchterm): """ search goodreads or googlebooks for a searchterm, return a list of results """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(searchterm) myqueue = queue.Queue() search_api = threading.Thread(target=GB.find_results, name='GB-RESULTS', args=[searchterm, myqueue]) search_api.start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": myqueue = queue.Queue() GR = GoodReads(searchterm) search_api = threading.Thread(target=GR.find_results, name='GR-RESULTS', args=[searchterm, myqueue]) search_api.start() search_api.join() searchresults = myqueue.get() sortedlist = sorted(searchresults, key=itemgetter('highest_fuzz', 'num_reviews'), reverse=True) return sortedlist
def _findAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return myDB = database.DBConnection() if lazylibrarian.BOOK_API == "GoogleBooks": GB = GoogleBooks(kwargs['name']) queue = Queue.Queue() search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[kwargs['name'], queue]) search_api.start() elif lazylibrarian.BOOK_API == "GoodReads": queue = Queue.Queue() GR = GoodReads(kwargs['name']) search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[kwargs['name'], queue]) search_api.start() search_api.join() self.data = queue.get()
def import_book(bookid, ebook=None, audio=None, wait=False): """ search goodreads or googlebooks for a bookid and import the book ebook/audio=None makes find_book use configured default """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) if not wait: _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, ebook, audio]).start() else: GB.find_book(bookid, ebook, audio) else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) if not wait: _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, ebook, audio]).start() else: GR.find_book(bookid, ebook, audio)
def _findAuthor(self, **kwargs): if 'name' not in kwargs: self.data = 'Missing parameter: name' return authorname = formatAuthorName(kwargs['name']) if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(authorname) myqueue = queue.Queue() search_api = threading.Thread(target=GB.find_results, name='API-GBRESULTS', args=[authorname, myqueue]) search_api.start() else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(authorname) myqueue = queue.Queue() search_api = threading.Thread(target=GR.find_results, name='API-GRRESULTS', args=[authorname, myqueue]) search_api.start() search_api.join() self.data = myqueue.get()
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh if authorid: controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} dbauthor = myDB.match("SELECT * from authors WHERE AuthorID='%s'" % authorid) if not dbauthor: authorname = 'unknown author' logger.debug("Now adding new author id: %s to database" % authorid) new_author = True else: authorname = dbauthor['authorname'] logger.debug("Now updating author %s " % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID="%s"' % authorid) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname.replace( "'", "''") dbauthor = myDB.match(query) controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) new_author = False myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id(refresh=refresh) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName="%s"' % authorname) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID='%s'" % authorid) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(authorname) GR.get_author_books(authorid, authorname, bookstatus, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) else: # if we're not loading any books, mark author as ignored controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Ignored"} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete" % authorname logger.debug(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(bookid, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def addAuthorToDB(authorname=None): type = 'author' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(authorname, type) query = "SELECT * from authors WHERE AuthorName='%s'" % authorname dbauthor = myDB.action(query).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorID": "0: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) author = GR.find_author_id() if author: authorid = author['authorid'] authorlink = author['authorlink'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": authorid, "AuthorLink": authorlink, "AuthorImg": authorimg, "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict) else: logger.error("Nothing found") # process books bookscount = 0 books = GB.find_results() for book in books: # this is for rare cases where google returns multiple authors who share nameparts if book['authorname'] == authorname: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "AuthorID": authorid, "AuthorLink": authorimg, "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) bookscount = bookscount + 1 lastbook = myDB.action( "SELECT BookName, BookLink, BookDate from books WHERE AuthorName='%s' order by BookDate DESC" % authorname).fetchone() controlValueDict = {"AuthorName": authorname} newValueDict = { "Status": "Active", "TotalBooks": bookscount, "LastBook": lastbook['BookName'], "LastLink": lastbook['BookLink'], "LastDate": lastbook['BookDate'] } myDB.upsert("authors", newValueDict, controlValueDict) logger.info("Processing complete: Added %s books to the database" % bookscount)
def addAuthorToDB(authorname=None, refresh=False, authorid=None, addbooks=True): """ Add an author to the database by name or id, and optionally get a list of all their books If author already exists in database, refresh their details and optionally booklist """ threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "AddAuthorToDB" try: myDB = database.DBConnection() match = False authorimg = '' new_author = not refresh entry_status = '' if authorid: dbauthor = myDB.match("SELECT * from authors WHERE AuthorID=?", (authorid, )) if not dbauthor: authorname = 'unknown author' logger.debug("Adding new author id %s to database" % authorid) new_author = True else: entry_status = dbauthor['Status'] authorname = dbauthor['authorname'] logger.debug("Updating author %s " % authorname) new_author = False controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": "Loading"} myDB.upsert("authors", newValueDict, controlValueDict) GR = GoodReads(authorname) author = GR.get_author_info(authorid=authorid) if author: authorname = author['authorname'] authorimg = author['authorimg'] controlValueDict = {"AuthorID": authorid} newValueDict = { "AuthorLink": author['authorlink'], "DateAdded": today() } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorName"] = author['authorname'] newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorid) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorID=?', (authorid, )) if authorname and not match: authorname = ' '.join( authorname.split()) # ensure no extra whitespace GR = GoodReads(authorname) author = GR.find_author_id(refresh=refresh) query = "SELECT * from authors WHERE AuthorName=?" dbauthor = myDB.match(query, (authorname.replace("'", "''"), )) if author and not dbauthor: # may have different name for same authorid (spelling?) query = "SELECT * from authors WHERE AuthorID=?" dbauthor = myDB.match(query, (author['authorid'], )) authorname = dbauthor['AuthorName'] controlValueDict = {"AuthorName": authorname} if not dbauthor: newValueDict = { "AuthorID": "0: %s" % authorname, "Status": "Loading" } logger.debug("Now adding new author: %s to database" % authorname) entry_status = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] new_author = True else: newValueDict = {"Status": "Loading"} logger.debug("Now updating author: %s" % authorname) entry_status = dbauthor['Status'] new_author = False myDB.upsert("authors", newValueDict, controlValueDict) if author: authorid = author['authorid'] authorimg = author['authorimg'] controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "DateAdded": today(), "Status": "Loading" } if not dbauthor or (dbauthor and not dbauthor['manual']): newValueDict["AuthorImg"] = author['authorimg'] newValueDict["AuthorBorn"] = author['authorborn'] newValueDict["AuthorDeath"] = author['authordeath'] myDB.upsert("authors", newValueDict, controlValueDict) match = True else: logger.warn(u"Nothing found for %s" % authorname) if not dbauthor: myDB.action('DELETE from authors WHERE AuthorName=?', (authorname, )) return if not match: logger.error( "AddAuthorToDB: No matching result for authorname or authorid") return # if author is set to manual, should we allow replacing 'nophoto' ? new_img = False match = myDB.match("SELECT Manual from authors WHERE AuthorID=?", (authorid, )) if not match or not match['Manual']: if authorimg and 'nophoto' in authorimg: newimg = getAuthorImage(authorid) if newimg: authorimg = newimg new_img = True # allow caching if authorimg and authorimg.startswith('http'): newimg, success = cache_img("author", authorid, authorimg, refresh=refresh) if success: authorimg = newimg new_img = True else: logger.debug('Failed to cache image for %s' % authorimg) if new_img: controlValueDict = {"AuthorID": authorid} newValueDict = {"AuthorImg": authorimg} myDB.upsert("authors", newValueDict, controlValueDict) if addbooks: # audiostatus = lazylibrarian.CONFIG['NEWAUDIO_STATUS'] if new_author: bookstatus = lazylibrarian.CONFIG['NEWAUTHOR_STATUS'] else: bookstatus = lazylibrarian.CONFIG['NEWBOOK_STATUS'] if entry_status not in ['Active', 'Wanted', 'Ignored', 'Paused']: entry_status = 'Active' # default for invalid/unknown or "loading" # process books if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": book_api = GoogleBooks() book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) elif lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": book_api = GoodReads(authorname) book_api.get_author_books(authorid, authorname, bookstatus, entrystatus=entry_status, refresh=refresh) # update totals works for existing authors only. # New authors need their totals updating after libraryscan or import of books. if not new_author: update_totals(authorid) if new_author and lazylibrarian.CONFIG['GR_FOLLOWNEW']: res = grfollow(authorid, True) if res.startswith('Unable'): logger.warn(res) try: followid = res.split("followid=")[1] logger.debug('%s marked followed' % authorname) except IndexError: followid = '' myDB.action('UPDATE authors SET GRfollow=? WHERE AuthorID=?', (followid, authorid)) else: # if we're not loading any books, mark author as ignored entry_status = 'Ignored' controlValueDict = {"AuthorID": authorid} newValueDict = {"Status": entry_status} myDB.upsert("authors", newValueDict, controlValueDict) msg = "[%s] Author update complete, status %s" % (authorname, entry_status) logger.info(msg) return msg except Exception: msg = 'Unhandled exception in addAuthorToDB: %s' % traceback.format_exc( ) logger.error(msg) return msg
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())