def import_book(bookid, wait=False): """ search goodreads or googlebooks for a bookid and import the book """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) if not wait: _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, "Wanted"]).start() else: GB.find_book(bookid, "Wanted") else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) if not wait: _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, "Wanted"]).start() else: GR.find_book(bookid, "Wanted")
def import_book(bookid, ebook=None, audio=None, wait=False): """ search goodreads or googlebooks for a bookid and import the book ebook/audio=None makes find_book use configured default """ if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": GB = GoogleBooks(bookid) if not wait: _ = threading.Thread(target=GB.find_book, name='GB-IMPORT', args=[bookid, ebook, audio]).start() else: GB.find_book(bookid, ebook, audio) else: # lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": GR = GoodReads(bookid) if not wait: _ = threading.Thread(target=GR.find_book, name='GR-IMPORT', args=[bookid, ebook, audio]).start() else: GR.find_book(bookid, ebook, audio)
def grsync(status, shelf): try: logger.debug('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None gr_shelf = GA.get_gr_shelf(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.debug("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) #print ll_list #print gr_shelf not_on_shelf = [] not_in_ll = [] for book in ll_list: if book not in gr_shelf: not_on_shelf.append(book) for book in gr_shelf: if book not in ll_list: not_in_ll.append(book) to_shelf = 0 to_ll = 0 moved = 0 # these need adding to shelf if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.debug('Not connected to goodreads') else: for book in not_on_shelf: #print "%s is not on shelf" % book time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now try: res, content = GA.BookToList(book, shelf) except Exception as e: logger.debug("Error in BookToList: %s" % str(e)) res = None if res: logger.debug("%10s added to %s shelf" % (book, shelf)) to_shelf += 1 #print content else: logger.debug("Failed to add %s to %s shelf" % (book, shelf)) #print content # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have, # if they are already Open/Have, remove from goodreads to-read shelf, add to owned shelf # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open for book in not_in_ll: #print "%s is not marked %s" % (book, status) cmd = 'select Status from books where bookid="%s"' % book result = myDB.match(cmd) if result: if result['Status'] in ['Have', 'Open']: # don't change status if we have it if shelf == 'to-read': time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now # need to move it from to-read shelf to owned shelf res, content = GA.BookToList(book, 'to-read', 'remove') if res: logger.debug("%10s removed from to-read shelf" % book) #print content else: logger.debug("Failed to remove %s from to-read shelf" % book) #print content time_now = int(time.time()) if time_now <= lazylibrarian.LAST_GOODREADS: time.sleep(1) lazylibrarian.LAST_GOODREADS = time_now res, content = GA.BookToList(book, 'owned', 'add') if res: logger.debug("%10s added to owned shelf" % book) moved += 1 #print content else: logger.debug("Failed to add %s to owned shelf" % book) #print content else: logger.debug("%10s is already marked %s" % (book, result['Status'])) elif shelf == 'owned': myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,)) else: myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book)) else: # add book to database as wanted logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) to_ll += 1 logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return to_shelf, to_ll, moved except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0,0,0
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # Sync method for WANTED: # Get results of last_sync (if any) # For each book in last_sync # if not in ll_list, new deletion, remove from gr_shelf # if not in gr_shelf, new deletion, remove from ll_list, mark Skipped # For each book in ll_list # if not in last_sync, new addition, add to gr_shelf # For each book in gr_shelf # if not in last sync, new addition, add to ll_list, mark Wanted # # save ll WANTED as last_sync # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ( "goodreads", shelf) res = myDB.match(cmd) last_sync = [] shelf_changed = 0 ll_changed = 0 if res: last_sync = getList(res['SyncList']) added_to_shelf = list(set(gr_shelf) - set(last_sync)) removed_from_shelf = list(set(last_sync) - set(gr_shelf)) added_to_ll = list(set(ll_list) - set(last_sync)) removed_from_ll = list(set(last_sync) - set(ll_list)) logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf)) for book in removed_from_ll: # first the deletions since last sync... try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.debug("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf)) for book in removed_from_shelf: # deleted from goodreads cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if res['Status'] in ['Have', 'Wanted']: myDB.action( 'UPDATE books SET Status="Skipped" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Skipped" % book) else: logger.warn("Not removing %s, book is marked %s" % (book, res['Status'])) # new additions to lazylibrarian logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf)) for book in added_to_ll: try: res, content = GA.BookToList(book, shelf, action='add') except Exception as e: logger.debug("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s added to %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content)) # new additions to goodreads shelf logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf)) for book in added_to_shelf: cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if status == 'Open': if res['Status'] == 'Open': logger.warn("Book %s is already marked Open" % book) else: myDB.action( 'UPDATE books SET Status="Have" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Have" % book) elif status == 'Wanted': # if in "wanted" and already marked "Open/Have", optionally delete from "wanted" # (depending on user prefs, to-read and wanted might not be the same thing) if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in [ 'Open', 'Have' ]: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.debug( "Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn( "Failed to remove %s from %s shelf: %s" % (book, shelf, content)) elif res['Status'] != 'Open': myDB.action( 'UPDATE books SET Status="Wanted" WHERE BookID=?', (book, )) ll_changed += 1 logger.debug("%10s set to Wanted" % book) else: logger.warn( "Not setting %s as Wanted, already marked Open" % book) # get new definitive list from ll cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) # store as comparison for next sync controlValueDict = {"UserID": "goodreads", "Label": shelf} newValueDict = { "Date": str(time.time()), "Synclist": ', '.join(ll_list) } myDB.upsert("sync", newValueDict, controlValueDict) logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return shelf_changed, ll_changed except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) GB = GoogleBooks(bookid, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = {"BookID": "BookID: %s" % (bookid), "Status": "Loading"} else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # Sync method for WANTED: # Get results of last_sync (if any) # For each book in last_sync # if not in ll_list, new deletion, remove from gr_shelf # if not in gr_shelf, new deletion, remove from ll_list, mark Skipped # For each book in ll_list # if not in last_sync, new addition, add to gr_shelf # For each book in gr_shelf # if not in last sync, new addition, add to ll_list, mark Wanted # # save ll WANTED as last_sync # For HAVE/OPEN method is the same, but only change status if HAVE, not OPEN cmd = 'select SyncList from sync where UserID="%s" and Label="%s"' % ("goodreads", shelf) res = myDB.match(cmd) last_sync = [] shelf_changed = 0 ll_changed = 0 if res: last_sync = getList(res['SyncList']) added_to_shelf = list(set(gr_shelf) - set(last_sync)) removed_from_shelf = list(set(last_sync) - set(gr_shelf)) added_to_ll = list(set(ll_list) - set(last_sync)) removed_from_ll = list(set(last_sync) - set(ll_list)) logger.info("%s missing from lazylibrarian %s" % (len(removed_from_ll), shelf)) for book in removed_from_ll: # first the deletions since last sync... try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) logger.info("%s missing from goodreads %s" % (len(removed_from_shelf), shelf)) for book in removed_from_shelf: # deleted from goodreads cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if res['Status'] in ['Have', 'Wanted']: myDB.action('UPDATE books SET Status="Skipped" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Skipped" % book) else: logger.warn("Not removing %s, book is marked %s" % (book, res['Status'])) # new additions to lazylibrarian logger.info("%s new in lazylibrarian %s" % (len(added_to_ll), shelf)) for book in added_to_ll: try: res, content = GA.BookToList(book, shelf, action='add') except Exception as e: logger.error("Error adding %s to %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s added to %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to add %s to %s shelf: %s" % (book, shelf, content)) # new additions to goodreads shelf logger.info("%s new in goodreads %s" % (len(added_to_shelf), shelf)) for book in added_to_shelf: cmd = 'select Status from books where BookID="%s"' % book res = myDB.match(cmd) if not res: logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) res = myDB.match(cmd) if not res: logger.warn('Book %s not found in database' % book) else: if status == 'Open': if res['Status'] == 'Open': logger.warn("Book %s is already marked Open" % book) else: myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Have" % book) elif status == 'Wanted': # if in "wanted" and already marked "Open/Have", optionally delete from "wanted" # (depending on user prefs, to-read and wanted might not be the same thing) if lazylibrarian.CONFIG['GR_UNIQUE'] and res['Status'] in ['Open', 'Have']: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.error("Error removing %s from %s: %s %s" % (book, shelf, type(e).__name__, str(e))) res = None content = '' if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) shelf_changed += 1 else: logger.warn("Failed to remove %s from %s shelf: %s" % (book, shelf, content)) elif res['Status'] != 'Open': myDB.action('UPDATE books SET Status="Wanted" WHERE BookID=?', (book,)) ll_changed += 1 logger.debug("%10s set to Wanted" % book) else: logger.warn("Not setting %s as Wanted, already marked Open" % book) # get new definitive list from ll cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) # store as comparison for next sync controlValueDict = {"UserID": "goodreads", "Label": shelf} newValueDict = {"Date": str(time.time()), "Synclist": ', '.join(ll_list)} myDB.upsert("sync", newValueDict, controlValueDict) logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return shelf_changed, ll_changed except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0
def addBookToDB(bookid, authorname): type = 'book' myDB = database.DBConnection() GR = GoodReads(authorname, type) # process book dbbook = myDB.action('SELECT * from books WHERE BookID=?', [bookid]).fetchone() controlValueDict = {"BookID": bookid} if dbbook is None: newValueDict = { "BookID": "BookID: %s" % (bookid), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} myDB.upsert("books", newValueDict, controlValueDict) book = GR.find_book() if not book: logger.warn("Error fetching bookinfo for BookID: " + bookid) else: controlValueDict = {"BookID": book['bookid']} newValueDict = { "AuthorName": book['authorname'], "BookName": book['bookname'], "BookDesc": book['bookdesc'], "BookIsbn": book['bookisbn'], "BookImg": book['bookimg'], "BookLink": book['booklink'], "BookRate": book['bookrate'], "BookPages": book['bookpages'], "BookDate": book['bookdate'], "BookLang": book['booklang'], "Status": "Skipped", "BookAdded": formatter.today() } myDB.upsert("books", newValueDict, controlValueDict) # process author dbauthor = myDB.action("SELECT * from authors WHERE AuthorName='?'", [authorname]).fetchone() controlValueDict = {"AuthorName": authorname} if dbauthor is None: newValueDict = { "AuthorName": "Authorname: %s" % (authorname), "Status": "Loading" } else: newValueDict = {"Status": "Loading"} author = GR.find_author_id() if not author: logger.warn("Error fetching authorinfo with name: " + authorname) else: controlValueDict = {"AuthorName": authorname} newValueDict = { "AuthorID": author['authorid'], "AuthorLink": author['authorlink'], "AuthorImg": author['authorimg'], "AuthorBorn": author['authorborn'], "AuthorDeath": author['authordeath'], "DateAdded": formatter.today(), "Status": "Loading" } myDB.upsert("authors", newValueDict, controlValueDict)
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match('Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) myDB.action('UPDATE books set AuthorName="%s" WHERE AuthorName="%s"' % (duplicate['AuthorName'], author['AuthorName'])) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action('UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) myDB.action('UPDATE books set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 file_count = 0 author = "" if lazylibrarian.FULL_SCAN: if startdir == destdir: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"') else: books = myDB.select( 'select AuthorName, BookName, BookFile, BookID from books where Status="Open"' + \ ' and BookFile like "' + startdir + '%"') status = lazylibrarian.NOTFOUND_STATUS logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not(bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] matchString = '' for char in lazylibrarian.EBOOK_DEST_FILE: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.EBOOK_TYPE) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if (lazylibrarian.IMP_SINGLEBOOK) and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join( r.encode(lazylibrarian.SYS_ENCODING), files.encode(lazylibrarian.SYS_ENCODING)) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) else: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} if 'title' in res and 'creator' in res: # this is the minimum we need match = 1 book = res['title'] author = res['creator'] if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) else: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") else: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % (isbnhead)) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) # get authors name in a consistent format if "," in author: # guess its "surname, forename" or "surname, initial(s)" so swap them round words = author.split(',') forename = words[1].strip() surname = words[0].strip() author = forename + ' ' + surname # reformat any initials, we want to end up with A.B. van Smith if author[1] == ' ' or author[1] == '.': forename = '' while author[1] == ' ' or author[1] == '.': forename = forename + author[0] + '.' author = author[2:].strip() author = forename + ' ' + author author = ' '.join(author.split()) # ensure no extra whitespace # Check if the author exists, and import the author if not, # before starting any complicated book-name matching to save repeating the search # check_exist_author = myDB.match( 'SELECT * FROM authors where AuthorName="%s"' % author.replace('"', '""')) if not check_exist_author and lazylibrarian.ADD_AUTHOR: # no match for supplied author, but we're allowed to add new ones GR = GoodReads(author) try: author_gr = GR.find_author_id() except Exception as e: logger.warn("Error finding author id for [%s] %s" % (author, str(e))) continue # only try to add if GR data matches found author data if author_gr: authorname = author_gr['authorname'] # "J.R.R. Tolkien" is the same person as "J. R. R. Tolkien" and "J R R Tolkien" match_auth = author.replace('.', ' ') match_auth = ' '.join(match_auth.split()) match_name = authorname.replace('.', ' ') match_name = ' '.join(match_name.split()) match_name = unaccented(match_name) match_auth = unaccented(match_auth) # allow a degree of fuzziness to cater for different accented character handling. # some author names have accents, # filename may have the accented or un-accented version of the character # The currently non-configurable value of fuzziness might need to go in config # We stored GoodReads unmodified author name in # author_gr, so store in LL db under that # fuzz.ratio doesn't lowercase for us match_fuzz = fuzz.ratio(match_auth.lower(), match_name.lower()) if match_fuzz < 90: logger.debug("Failed to match author [%s] to authorname [%s] fuzz [%d]" % (author, match_name, match_fuzz)) # To save loading hundreds of books by unknown authors at GR or GB, ignore unknown if (author != "Unknown") and (match_fuzz >= 90): # use "intact" name for author that we stored in # GR author_dict, not one of the various mangled versions # otherwise the books appear to be by a different author! author = author_gr['authorname'] # this new authorname may already be in the # database, so check again check_exist_author = myDB.match( 'SELECT * FROM authors where AuthorName="%s"' % author.replace('"', '""')) if not check_exist_author: logger.info("Adding new author [%s]" % author) try: addAuthorToDB(author, refresh=False) check_exist_author = myDB.match( 'SELECT * FROM authors where AuthorName="%s"' % author.replace('"', '""')) except Exception: continue # check author exists in db, either newly loaded or already there if not check_exist_author: logger.debug("Failed to match author [%s] in database" % author) else: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") match = False # See if the gr_id, gb_id or isbn is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: logger.debug('Unable to find book %s by %s in database, trying to add it' % (book, author)) if lazylibrarian.BOOK_API == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.BOOK_API == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # Try and find it under metadata authorname bookid = find_book_in_db(myDB, author, book) if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) if author.lower() != newauthor.lower(): bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) if bookid: check_status = myDB.match( 'SELECT Status, BookFile, AuthorName, BookName from books where BookID="%s"' % bookid) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched BookID %s, [%s][%s]" % (author, book, bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match("select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") if stats and stats['sum(GR_book_hits)']: if lazylibrarian.BOOK_API == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoogleBooks language was changed %s time%s" % (stats['sum(GB_lang_change)'], plural(stats['sum(GB_lang_change)']))) if lazylibrarian.BOOK_API == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (stats['sum(GR_book_hits)'], plural(stats['sum(GR_book_hits)']))) logger.debug("GoodReads was hit %s time%s for languages" % (stats['sum(GR_lang_hits)'], plural(stats['sum(GR_lang_hits)']))) logger.debug("LibraryThing was hit %s time%s for languages" % (stats['sum(LT_lang_hits)'], plural(stats['sum(LT_lang_hits)']))) logger.debug("Language cache was hit %s time%s" % (stats['sum(cache_hits)'], plural(stats['sum(cache_hits)']))) logger.debug("Unwanted language removed %s book%s" % (stats['sum(bad_lang)'], plural(stats['sum(bad_lang)']))) logger.debug("Unwanted characters removed %s book%s" % (stats['sum(bad_char)'], plural(stats['sum(bad_char)']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (stats['sum(uncached)'], plural(stats['sum(uncached)']))) logger.debug("Found %s duplicate book%s" % (stats['sum(duplicates)'], plural(stats['sum(duplicates)']))) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] bookname = item['bookname'] newimg = cache_cover(bookid, bookimg) if newimg: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] authorname = item['authorname'] newimg = cache_cover(authorid, authorimg) if newimg: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception as e: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())
def grsync(status, shelf): # noinspection PyBroadException try: shelf = shelf.lower() logger.info('Syncing %s to %s shelf' % (status, shelf)) myDB = database.DBConnection() cmd = 'select bookid from books where status="%s"' % status if status == 'Open': cmd += ' or status="Have"' results = myDB.select(cmd) ll_list = [] for terms in results: ll_list.append(terms['bookid']) GA = grauth() GR = None shelves = GA.get_shelf_list() found = False for item in shelves: # type: dict if item['name'] == shelf: found = True break if not found: res, msg = GA.create_shelf(shelf=shelf) if not res: logger.debug("Unable to create shelf %s: %s" % (shelf, msg)) return 0, 0, 0 else: logger.debug("Created new goodreads shelf: %s" % shelf) gr_shelf = GA.get_gr_shelf_contents(shelf=shelf) dstatus = status if dstatus == "Open": dstatus += "/Have" logger.info("There are %s %s books, %s books on goodreads %s shelf" % (len(ll_list), dstatus, len(gr_shelf), shelf)) # print ll_list # print gr_shelf not_on_shelf = [] not_in_ll = [] for book in ll_list: if book not in gr_shelf: not_on_shelf.append(book) for book in gr_shelf: if book not in ll_list: not_in_ll.append(book) to_shelf = 0 to_ll = 0 # these need adding to shelf if not lazylibrarian.CONFIG['GR_OAUTH_SECRET']: logger.debug('Not connected to goodreads') else: for book in not_on_shelf: # print "%s is not on shelf" % book try: res, content = GA.BookToList(book, shelf) except Exception as e: logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e))) res = None if res: if lazylibrarian.LOGLEVEL > 2: logger.debug("%10s added to %s shelf" % (book, shelf)) to_shelf += 1 # print content else: logger.debug("Failed to add %s to %s shelf" % (book, shelf)) # print content # "to-read" books need adding to lazylibrarian as "wanted" if not already Open/Have, # if in "to-read" and already marked "Open/Have", optionally delete from "to-read" # (depending on user prefs, to-read and wanted might not be the same thing) # "owned" need adding as "Have" as librarysync will pick up "Open" ones or change Have to Open for book in not_in_ll: # print "%s is not marked %s" % (book, status) cmd = 'select Status from books where bookid="%s"' % book result = myDB.match(cmd) if result: if result['Status'] in ['Have', 'Open']: # don't change status if we have it logger.debug("%10s is already marked %s" % (book, result['Status'])) if lazylibrarian.CONFIG['GR_UNIQUE']: try: res, content = GA.BookToList(book, shelf, action='remove') except Exception as e: logger.debug("Error in BookToList: %s %s" % (type(e).__name__, str(e))) res = None if res: logger.debug("%10s removed from %s shelf" % (book, shelf)) # print content else: logger.debug("Failed to remove %s from %s shelf" % (book, shelf)) # print content elif shelf == 'owned': myDB.action('UPDATE books SET Status="Have" WHERE BookID=?', (book,)) else: myDB.action('UPDATE books SET Status=? WHERE BookID=?', (status, book)) else: # add book to database as wanted logger.debug('Adding new book %s to database' % book) if not GR: GR = GoodReads(book) GR.find_book(book) to_ll += 1 logger.debug('Sync %s to %s shelf complete' % (status, shelf)) return to_shelf, to_ll except Exception: logger.error('Unhandled exception in grsync: %s' % traceback.format_exc()) return 0, 0, 0
def LibraryScan(startdir=None): """ Scan a directory tree adding new books into database Return how many books you added """ try: destdir = lazylibrarian.DIRECTORY('Destination') if not startdir: if not destdir: logger.warn('Cannot find destination directory: %s. Not scanning' % destdir) return 0 startdir = destdir if not os.path.isdir(startdir): logger.warn('Cannot find directory: %s. Not scanning' % startdir) return 0 if not internet(): logger.warn('Libraryscan: No internet connection') return 0 myDB = database.DBConnection() # keep statistics of full library scans if startdir == destdir: myDB.action('DELETE from stats') try: # remove any extra whitespace in authornames authors = myDB.select('SELECT AuthorID,AuthorName FROM authors WHERE AuthorName like "% %"') if authors: logger.info('Removing extra spaces from %s authorname%s' % (len(authors), plural(len(authors)))) for author in authors: authorid = author["AuthorID"] authorname = ' '.join(author['AuthorName'].split()) # Have we got author name both with-and-without extra spaces? If so, merge them duplicate = myDB.match( 'Select AuthorID,AuthorName FROM authors WHERE AuthorName="%s"' % authorname) if duplicate: myDB.action('DELETE from authors where authorname="%s"' % author['AuthorName']) if author['AuthorID'] != duplicate['AuthorID']: myDB.action('UPDATE books set AuthorID="%s" WHERE AuthorID="%s"' % (duplicate['AuthorID'], author['AuthorID'])) else: myDB.action( 'UPDATE authors set AuthorName="%s" WHERE AuthorID="%s"' % (authorname, authorid)) except Exception as e: logger.info('Error: ' + str(e)) logger.info('Scanning ebook directory: %s' % startdir) new_book_count = 0 modified_count = 0 rescan_count = 0 rescan_hits = 0 file_count = 0 author = "" if lazylibrarian.CONFIG['FULL_SCAN']: cmd = 'select AuthorName, BookName, BookFile, BookID from books,authors' cmd += ' where books.AuthorID = authors.AuthorID and books.Status="Open"' if not startdir == destdir: cmd += ' and BookFile like "' + startdir + '%"' books = myDB.select(cmd) status = lazylibrarian.CONFIG['NOTFOUND_STATUS'] logger.info('Missing books will be marked as %s' % status) for book in books: bookID = book['BookID'] bookfile = book['BookFile'] if not (bookfile and os.path.isfile(bookfile)): myDB.action('update books set Status="%s" where BookID="%s"' % (status, bookID)) myDB.action('update books set BookFile="" where BookID="%s"' % bookID) logger.warn('Book %s - %s updated as not found on disk' % (book['AuthorName'], book['BookName'])) # to save repeat-scans of the same directory if it contains multiple formats of the same book, # keep track of which directories we've already looked at processed_subdirectories = [] warned = False # have we warned about no new authors setting matchString = '' for char in lazylibrarian.CONFIG['EBOOK_DEST_FILE']: matchString = matchString + '\\' + char # massage the EBOOK_DEST_FILE config parameter into something we can use # with regular expression matching booktypes = '' count = -1 booktype_list = getList(lazylibrarian.CONFIG['EBOOK_TYPE']) for book_type in booktype_list: count += 1 if count == 0: booktypes = book_type else: booktypes = booktypes + '|' + book_type matchString = matchString.replace("\\$\\A\\u\\t\\h\\o\\r", "(?P<author>.*?)").replace( "\\$\\T\\i\\t\\l\\e", "(?P<book>.*?)") + '\.[' + booktypes + ']' pattern = re.compile(matchString, re.VERBOSE) for r, d, f in os.walk(startdir): for directory in d[:]: # prevent magazine being scanned if directory.startswith("_") or directory.startswith("."): d.remove(directory) for files in f: file_count += 1 if isinstance(r, str): r = r.decode(lazylibrarian.SYS_ENCODING) subdirectory = r.replace(startdir, '') # Added new code to skip if we've done this directory before. # Made this conditional with a switch in config.ini # in case user keeps multiple different books in the same subdirectory if lazylibrarian.CONFIG['IMP_SINGLEBOOK'] and (subdirectory in processed_subdirectories): logger.debug("[%s] already scanned" % subdirectory) else: # If this is a book, try to get author/title/isbn/language # if epub or mobi, read metadata from the book # If metadata.opf exists, use that allowing it to override # embedded metadata. User may have edited metadata.opf # to merge author aliases together # If all else fails, try pattern match for author/title # and look up isbn/lang from LT or GR later match = 0 if is_valid_booktype(files): logger.debug("[%s] Now scanning subdirectory %s" % (startdir, subdirectory)) language = "Unknown" isbn = "" book = "" author = "" gr_id = "" gb_id = "" extn = os.path.splitext(files)[1] # if it's an epub or a mobi we can try to read metadata from it if (extn == ".epub") or (extn == ".mobi"): book_filename = os.path.join(r, files).encode(lazylibrarian.SYS_ENCODING) try: res = get_book_info(book_filename) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (book_filename, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'type' in res: extn = res['type'] logger.debug("book meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, extn)) if not match: logger.debug("Book meta incomplete in %s" % book_filename) # calibre uses "metadata.opf", LL uses "bookname - authorname.opf" # just look for any .opf file in the current directory since we don't know # LL preferred authorname/bookname at this point. # Allow metadata in file to override book contents as may be users pref metafile = opf_file(r) try: res = get_book_info(metafile) except Exception as e: logger.debug('get_book_info failed for %s, %s' % (metafile, str(e))) res = {} # title and creator are the minimum we need if 'title' in res and 'creator' in res: book = res['title'] author = res['creator'] if book and len(book) > 2 and author and len(author) > 2: match = 1 if 'language' in res: language = res['language'] if 'identifier' in res: isbn = res['identifier'] if 'gr_id' in res: gr_id = res['gr_id'] logger.debug("file meta [%s] [%s] [%s] [%s] [%s]" % (isbn, language, author, book, gr_id)) if not match: logger.debug("File meta incomplete in %s" % metafile) if not match: # no author/book from metadata file, and not embedded either match = pattern.match(files) if match: author = match.group("author") book = match.group("book") if len(book) <= 2 or len(author) <= 2: match = 0 if not match: logger.debug("Pattern match failed [%s]" % files) if match: # flag that we found a book in this subdirectory processed_subdirectories.append(subdirectory) # If we have a valid looking isbn, and language != "Unknown", add it to cache if language != "Unknown" and is_valid_isbn(isbn): logger.debug("Found Language [%s] ISBN [%s]" % (language, isbn)) # we need to add it to language cache if not already # there, is_valid_isbn has checked length is 10 or 13 if len(isbn) == 10: isbnhead = isbn[0:3] else: isbnhead = isbn[3:6] match = myDB.match('SELECT lang FROM languages where isbn = "%s"' % isbnhead) if not match: myDB.action('insert into languages values ("%s", "%s")' % (isbnhead, language)) logger.debug("Cached Lang [%s] ISBN [%s]" % (language, isbnhead)) else: logger.debug("Already cached Lang [%s] ISBN [%s]" % (language, isbnhead)) author, authorid, new = addAuthorNameToDB(author) # get the author name as we know it... if author: # author exists, check if this book by this author is in our database # metadata might have quotes in book name # some books might be stored under a different author name # eg books by multiple authors, books where author is "writing as" # or books we moved to "merge" authors book = book.replace("'", "") # First try and find it under author and bookname # as we may have it under a different bookid or isbn to goodreads/googlebooks # which might have several bookid/isbn for the same book bookid = find_book_in_db(myDB, author, book) if not bookid: # Title or author name might not match or multiple authors # See if the gr_id, gb_id is already in our database if gr_id: bookid = gr_id elif gb_id: bookid = gb_id else: bookid = "" if bookid: match = myDB.match('SELECT BookID FROM books where BookID = "%s"' % bookid) if not match: msg = 'Unable to find book %s by %s in database, trying to add it using ' if bookid == gr_id: msg += "GoodReads ID " + gr_id if bookid == gb_id: msg += "GoogleBooks ID " + gb_id logger.debug(msg % (book, author)) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads" and gr_id: GR_ID = GoodReads(gr_id) GR_ID.find_book(gr_id, None) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks" and gb_id: GB_ID = GoogleBooks(gb_id) GB_ID.find_book(gb_id, None) # see if it's there now... match = myDB.match('SELECT BookID from books where BookID="%s"' % bookid) if not match: logger.debug("Unable to add bookid %s to database" % bookid) bookid = "" if not bookid and isbn: # See if the isbn is in our database match = myDB.match('SELECT BookID FROM books where BookIsbn = "%s"' % isbn) if match: bookid = match['BookID'] if not bookid: # get author name from parent directory of this book directory newauthor = os.path.basename(os.path.dirname(r)) # calibre replaces trailing periods with _ eg Smith Jr. -> Smith Jr_ if newauthor.endswith('_'): newauthor = newauthor[:-1] + '.' if author.lower() != newauthor.lower(): logger.debug("Trying authorname [%s]" % newauthor) bookid = find_book_in_db(myDB, newauthor, book) if bookid: logger.warn("%s not found under [%s], found under [%s]" % (book, author, newauthor)) # at this point if we still have no bookid, it looks like we # have author and book title but no database entry for it if not bookid: if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": # Either goodreads doesn't have the book or it didn't match language prefs # Since we have the book anyway, try and reload it ignoring language prefs rescan_count += 1 base_url = 'http://www.goodreads.com/search.xml?q=' params = {"key": lazylibrarian.CONFIG['GR_API']} if author[1] in '. ': surname = author forename = '' while surname[1] in '. ': forename = forename + surname[0] + '.' surname = surname[2:].strip() if author != forename + ' ' + surname: logger.debug('Stripped authorname [%s] to [%s %s]' % (author, forename, surname)) author = forename + ' ' + surname author = ' '.join(author.split()) # ensure no extra whitespace searchname = author + ' ' + book searchname = cleanName(unaccented(searchname)) searchterm = urllib.quote_plus(searchname.encode(lazylibrarian.SYS_ENCODING)) set_url = base_url + searchterm + '&' + urllib.urlencode(params) try: rootxml, in_cache = get_xml_request(set_url) if not len(rootxml): logger.debug("Error requesting results from GoodReads") else: resultxml = rootxml.getiterator('work') for item in resultxml: booktitle = item.find('./best_book/title').text book_fuzz = fuzz.token_set_ratio(booktitle, book) if book_fuzz >= 98: logger.debug("Rescan found %s : %s" % (booktitle, language)) rescan_hits += 1 bookid = item.find('./best_book/id').text GR_ID = GoodReads(bookid) GR_ID.find_book(bookid, None) if language and language != "Unknown": # set language from book metadata logger.debug("Setting language from metadata %s : %s" % (booktitle, language)) myDB.action('UPDATE books SET BookLang="%s" WHERE BookID="%s"' % (language, bookid)) break if not bookid: logger.warn("GoodReads doesn't know about %s" % book) except Exception as e: logger.error("Error finding rescan results: %s" % str(e)) elif lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": # if we get here using googlebooks it's because googlebooks # doesn't have the book. No point in looking for it again. logger.warn("GoogleBooks doesn't know about %s" % book) # see if it's there now... if bookid: cmd = 'SELECT books.Status, BookFile, AuthorName, BookName from books,authors ' cmd += 'where books.AuthorID = authors.AuthorID and BookID="%s"' % bookid check_status = myDB.match(cmd) if not check_status: logger.debug('Unable to find bookid %s in database' % bookid) else: if check_status['Status'] != 'Open': # we found a new book new_book_count += 1 myDB.action( 'UPDATE books set Status="Open" where BookID="%s"' % bookid) # store book location so we can check if it gets removed book_filename = os.path.join(r, files) if not check_status['BookFile']: # no previous location myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # location may have changed since last scan elif book_filename != check_status['BookFile']: modified_count += 1 logger.warn("Updating book location for %s %s from %s to %s" % (author, book, check_status['BookFile'], book_filename)) logger.debug("%s %s matched %s BookID %s, [%s][%s]" % (author, book, check_status['Status'], bookid, check_status['AuthorName'], check_status['BookName'])) myDB.action('UPDATE books set BookFile="%s" where BookID="%s"' % (book_filename, bookid)) # update cover file to cover.jpg in book folder (if exists) bookdir = os.path.dirname(book_filename) coverimg = os.path.join(bookdir, 'cover.jpg') if os.path.isfile(coverimg): cachedir = lazylibrarian.CACHEDIR cacheimg = os.path.join(cachedir, 'book', bookid + '.jpg') copyfile(coverimg, cacheimg) else: logger.warn( "Failed to match book [%s] by [%s] in database" % (book, author)) else: if not warned and not lazylibrarian.CONFIG['ADD_AUTHOR']: logger.warn("Add authors to database is disabled") warned = True logger.info("%s/%s new/modified book%s found and added to the database" % (new_book_count, modified_count, plural(new_book_count + modified_count))) logger.info("%s file%s processed" % (file_count, plural(file_count))) if startdir == destdir: # On full library scans, check for missing workpages setWorkPages() # and books with unknown language nolang = myDB.match( "select count('BookID') as counter from Books where status='Open' and BookLang='Unknown'") nolang = nolang['counter'] if nolang: logger.warn("Found %s book%s in your library with unknown language" % (nolang, plural(nolang))) # show stats if new books were added stats = myDB.match( "SELECT sum(GR_book_hits), sum(GR_lang_hits), sum(LT_lang_hits), sum(GB_lang_change), \ sum(cache_hits), sum(bad_lang), sum(bad_char), sum(uncached), sum(duplicates) FROM stats") st= {'GR_book_hits': stats['sum(GR_book_hits)'], 'GB_book_hits': stats['sum(GR_book_hits)'], 'GR_lang_hits': stats['sum(GR_lang_hits)'], 'LT_lang_hits': stats['sum(LT_lang_hits)'], 'GB_lang_change': stats['sum(GB_lang_change)'], 'cache_hits': stats['sum(cache_hits)'], 'bad_lang': stats['sum(bad_lang)'], 'bad_char': stats['sum(bad_char)'], 'uncached': stats['sum(uncached)'], 'duplicates': stats['sum(duplicates)']} for item in st.keys(): if st[item] is None: st[item] = 0 if lazylibrarian.CONFIG['BOOK_API'] == "GoogleBooks": logger.debug("GoogleBooks was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoogleBooks language was changed %s time%s" % (st['GB_lang_change'], plural(st['GB_lang_change']))) if lazylibrarian.CONFIG['BOOK_API'] == "GoodReads": logger.debug("GoodReads was hit %s time%s for books" % (st['GR_book_hits'], plural(st['GR_book_hits']))) logger.debug("GoodReads was hit %s time%s for languages" % (st['GR_lang_hits'], plural(st['GR_lang_hits']))) logger.debug("LibraryThing was hit %s time%s for languages" % (st['LT_lang_hits'], plural(st['LT_lang_hits']))) logger.debug("Language cache was hit %s time%s" % (st['cache_hits'], plural(st['cache_hits']))) logger.debug("Unwanted language removed %s book%s" % (st['bad_lang'], plural(st['bad_lang']))) logger.debug("Unwanted characters removed %s book%s" % (st['bad_char'], plural(st['bad_char']))) logger.debug("Unable to cache language for %s book%s with missing ISBN" % (st['uncached'], plural(st['uncached']))) logger.debug("Found %s duplicate book%s" % (st['duplicates'], plural(st['duplicates']))) logger.debug("Rescan %s hit%s, %s miss" % (rescan_hits, plural(rescan_hits), rescan_count - rescan_hits)) logger.debug("Cache %s hit%s, %s miss" % (lazylibrarian.CACHE_HIT, plural(lazylibrarian.CACHE_HIT), lazylibrarian.CACHE_MISS)) cachesize = myDB.match("select count('ISBN') as counter from languages") logger.debug("ISBN Language cache holds %s entries" % cachesize['counter']) # Cache any covers and images images = myDB.select('select bookid, bookimg, bookname from books where bookimg like "http%"') if len(images): logger.info("Caching cover%s for %i book%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: bookid = item['bookid'] bookimg = item['bookimg'] # bookname = item['bookname'] newimg, success = cache_img("book", bookid, bookimg) if success: myDB.action('update books set BookImg="%s" where BookID="%s"' % (newimg, bookid)) images = myDB.select('select AuthorID, AuthorImg, AuthorName from authors where AuthorImg like "http%"') if len(images): logger.info("Caching image%s for %i author%s" % (plural(len(images)), len(images), plural(len(images)))) for item in images: authorid = item['authorid'] authorimg = item['authorimg'] # authorname = item['authorname'] newimg, success = cache_img("author", authorid, authorimg) if success: myDB.action('update authors set AuthorImg="%s" where AuthorID="%s"' % (newimg, authorid)) # On full scan, update bookcounts for all authors, not just new ones - refresh may have located # new books for existing authors especially if switched provider gb/gr or changed wanted languages authors = myDB.select('select AuthorID from authors') else: # On single author/book import, just update bookcount for that author authors = myDB.select('select AuthorID from authors where AuthorName = "%s"' % author.replace('"', '""')) logger.debug('Updating bookcounts for %i author%s' % (len(authors), plural(len(authors)))) for author in authors: update_totals(author['AuthorID']) logger.info('Library scan complete') return new_book_count except Exception: logger.error('Unhandled exception in libraryScan: %s' % traceback.format_exc())